def init_table_manager_and_federation(cls, job_id, role, num_hosts, host_ind=0): from arch.api import session from arch.api import federation role_id = { "host": [10000 + i for i in range(num_hosts)], "guest": [9999], "arbiter": [9999] } session.init(job_id) federation.init( job_id, { "local": { "role": role, "party_id": role_id[role][0] if role != "host" else role_id[role][host_ind] }, "role": role_id })
def _init_argument(self): self._init_LOGGER(LOGGER_path) self._initialize(config_path) with open(config_path) as conf_f: runtime_json = json.load(conf_f) eggroll.init(job_id) federation.init(job_id, runtime_json)
def test_model(role1, role2): with open("%s_runtime_conf.json" % role1) as conf_fr: runtime_conf = json_loads(conf_fr.read()) federation.init(job_id=job_id, runtime_conf=runtime_conf) print(federation.get_field("role")) model_meta_save = ModelMeta() model_meta_save.name = "HeteroLR%s" % (role2) commit_id = save_model("model_meta", model_meta_save, commit_log="xxx") print("save guest model success, commit id is %s" % commit_id) model_meta_read = ModelMeta() read_model("model_meta", model_meta_read) print(model_meta_read) model_param_save = ModelParam() model_param_save.weight["k1"] = 1 model_param_save.weight["k2"] = 2 commit_id = save_model("model_param", model_param_save, commit_log="xxx") print("save guest model success, commit id is %s" % commit_id) # read model_param_read = ModelParam() read_model("model_param", model_param_read) print(model_param_read) data_transform = DataTransformServer() data_transform.missing_replace_method = "xxxx" save_model("data_transform", data_transform)
def _init_argument(self): parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', required=True, type=str, help="Specify a config json file path") parser.add_argument('-j', '--job_id', type=str, required=True, help="Specify the job id") # parser.add_argument('-p', '--party_id', type=str, required=True, help="Specify the party id") # parser.add_argument('-l', '--LOGGER_path', type=str, required=True, help="Specify the LOGGER path") args = parser.parse_args() config_path = args.config self.config_path = config_path if not args.config: LOGGER.error("Config File should be provided") exit(-100) self.job_id = args.job_id all_checker = AllChecker(config_path) all_checker.check_all() self._initialize(config_path) with open(config_path) as conf_f: runtime_json = json.load(conf_f) eggroll.init(self.job_id, self.workflow_param.work_mode) LOGGER.debug("The job id is {}".format(self.job_id)) federation.init(self.job_id, runtime_json) LOGGER.debug("Finish eggroll and federation init") self._init_pipeline()
def _init_argument(self): self._initialize(config_path) with open(config_path) as conf_f: runtime_json = json.load(conf_f) LOGGER.debug("The Guest job id is {}".format(job_id)) LOGGER.debug("The Guest work mode id is {}".format(self.workflow_param.work_mode)) eggroll.init(job_id, self.workflow_param.work_mode) federation.init(job_id, runtime_json) LOGGER.debug("Finish eggroll and federation init")
def test_remote(self): table = session.table(name='remote_name', namespace='remote_namespace', partition=1) table.put_all(range(12)) from arch.api import federation federation.init(session.get_session_id(), runtime_conf=None) federation.remote(table, name="roll_pair_name.table", tag="roll_pair_tag")
def init_session_and_federation(job_id, role, partyid, partyid_map): from arch.api import session from arch.api import federation session.init(job_id) federation.init(job_id=job_id, runtime_conf={ "local": { "role": role, "party_id": partyid }, "role": partyid_map })
def session_init(job_id, idx): from arch.api import session from arch.api import federation role = "guest" if idx < 1 else "host" party_id = 9999 + idx if idx < 1 else 10000 + (idx - 1) role_parties = { "host": [10000 + i for i in range(NUM_HOSTS)], "guest": [9999 + i for i in range(1)] } session.init(job_id) federation.init( job_id, dict(local=dict(role=role, party_id=party_id), role=role_parties)) return federation.local_party(), federation.all_parties()
def _init_argument(self, config_json, job_id): self.config_json = config_json self.job_id = job_id from federatedml.param import param self.valid_classes = [ class_info[0] for class_info in inspect.getmembers(param, inspect.isclass) ] # home_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) # param_validation_path = home_dir + "/conf/param_validation.json" # all_checker = AllChecker(config_json, param_validation_path) # all_checker.check_all() # LOGGER.debug("Finish all parameter checkers") self._initialize() eggroll.init(self.job_id, self.workflow_param.work_mode) LOGGER.debug("The job id is {}".format(self.job_id)) federation.init(self.job_id, self.config_json) LOGGER.debug("Finish eggroll and federation init") self._init_pipeline()
def run_task(): task = Task() task.f_create_time = current_timestamp() try: parser = argparse.ArgumentParser() parser.add_argument('-j', '--job_id', required=True, type=str, help="job id") parser.add_argument('-n', '--component_name', required=True, type=str, help="component name") parser.add_argument('-t', '--task_id', required=True, type=str, help="task id") parser.add_argument('-r', '--role', required=True, type=str, help="role") parser.add_argument('-p', '--party_id', required=True, type=str, help="party id") parser.add_argument('-c', '--config', required=True, type=str, help="task config") parser.add_argument('--job_server', help="job server", type=str) args = parser.parse_args() schedule_logger.info('enter task process') schedule_logger.info(args) # init function args if args.job_server: RuntimeConfig.init_config( HTTP_PORT=args.job_server.split(':')[1]) job_id = args.job_id component_name = args.component_name task_id = args.task_id role = args.role party_id = int(args.party_id) task_config = file_utils.load_json_conf(args.config) job_parameters = task_config['job_parameters'] job_initiator = task_config['job_initiator'] job_args = task_config['job_args'] task_input_dsl = task_config['input'] task_output_dsl = task_config['output'] parameters = task_config['parameters'] module_name = task_config['module_name'] except Exception as e: schedule_logger.exception(e) task.f_status = TaskStatus.FAILED return try: # init environment, process is shared globally RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode']) storage.init_storage(job_id=task_id, work_mode=RuntimeConfig.WORK_MODE) federation.init(job_id=task_id, runtime_conf=parameters) job_log_dir = os.path.join( job_utils.get_job_log_directory(job_id=job_id), role, str(party_id)) task_log_dir = os.path.join(job_log_dir, component_name) log_utils.LoggerFactory.set_directory(directory=task_log_dir, parent_log_dir=job_log_dir, append_to_parent_log=True, force=True) task.f_job_id = job_id task.f_component_name = component_name task.f_task_id = task_id task.f_role = role task.f_party_id = party_id task.f_operator = 'python_operator' tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name, task_id=task_id, model_id=job_parameters['model_id'], model_version=job_parameters['model_version'], module_name=module_name) task.f_start_time = current_timestamp() task.f_run_ip = get_lan_ip() task.f_run_pid = os.getpid() run_class_paths = parameters.get('CodePath').split('/') run_class_package = '.'.join( run_class_paths[:-2]) + '.' + run_class_paths[-2].replace( '.py', '') run_class_name = run_class_paths[-1] task_run_args = TaskExecutor.get_task_run_args( job_id=job_id, role=role, party_id=party_id, job_parameters=job_parameters, job_args=job_args, input_dsl=task_input_dsl) run_object = getattr(importlib.import_module(run_class_package), run_class_name)() run_object.set_tracker(tracker=tracker) run_object.set_taskid(taskid=task_id) task.f_status = TaskStatus.RUNNING TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id, initiator_party_id=job_initiator.get( 'party_id', None), task_info=task.to_json()) schedule_logger.info('run {} {} {} {} {} task'.format( job_id, component_name, task_id, role, party_id)) schedule_logger.info(parameters) schedule_logger.info(task_input_dsl) run_object.run(parameters, task_run_args) if task_output_dsl: if task_output_dsl.get('data', []): output_data = run_object.save_data() tracker.save_output_data_table( output_data, task_output_dsl.get('data')[0]) if task_output_dsl.get('model', []): output_model = run_object.export_model() # There is only one model output at the current dsl version. tracker.save_output_model(output_model, task_output_dsl['model'][0]) task.f_status = TaskStatus.SUCCESS except Exception as e: schedule_logger.exception(e) task.f_status = TaskStatus.FAILED finally: try: task.f_end_time = current_timestamp() task.f_elapsed = task.f_end_time - task.f_start_time task.f_update_time = current_timestamp() TaskExecutor.sync_task_status( job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id, initiator_party_id=job_initiator.get('party_id', None), task_info=task.to_json()) except Exception as e: schedule_logger.exception(e) schedule_logger.info('finish {} {} {} {} {} {} task'.format( job_id, component_name, task_id, role, party_id, task.f_status)) print('finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status))
def run_task(): task = Task() task.f_create_time = current_timestamp() try: parser = argparse.ArgumentParser() parser.add_argument('-j', '--job_id', required=True, type=str, help="job id") parser.add_argument('-n', '--component_name', required=True, type=str, help="component name") parser.add_argument('-t', '--task_id', required=True, type=str, help="task id") parser.add_argument('-r', '--role', required=True, type=str, help="role") parser.add_argument('-p', '--party_id', required=True, type=str, help="party id") parser.add_argument('-c', '--config', required=True, type=str, help="task config") parser.add_argument('--processors_per_node', help="processors_per_node", type=int) parser.add_argument('--job_server', help="job server", type=str) args = parser.parse_args() schedule_logger(args.job_id).info('enter task process') schedule_logger(args.job_id).info(args) # init function args if args.job_server: RuntimeConfig.init_config(HTTP_PORT=args.job_server.split(':')[1]) RuntimeConfig.set_process_role(ProcessRole.EXECUTOR) job_id = args.job_id component_name = args.component_name task_id = args.task_id role = args.role party_id = int(args.party_id) executor_pid = os.getpid() task_config = file_utils.load_json_conf(args.config) job_parameters = task_config['job_parameters'] job_initiator = task_config['job_initiator'] job_args = task_config['job_args'] task_input_dsl = task_config['input'] task_output_dsl = task_config['output'] component_parameters = TaskExecutor.get_parameters(job_id, component_name, role, party_id) task_parameters = task_config['task_parameters'] module_name = task_config['module_name'] TaskExecutor.monkey_patch() except Exception as e: traceback.print_exc() schedule_logger().exception(e) task.f_status = TaskStatus.FAILED return try: job_log_dir = os.path.join(job_utils.get_job_log_directory(job_id=job_id), role, str(party_id)) task_log_dir = os.path.join(job_log_dir, component_name) log_utils.LoggerFactory.set_directory(directory=task_log_dir, parent_log_dir=job_log_dir, append_to_parent_log=True, force=True) task.f_job_id = job_id task.f_component_name = component_name task.f_task_id = task_id task.f_role = role task.f_party_id = party_id task.f_operator = 'python_operator' tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name, task_id=task_id, model_id=job_parameters['model_id'], model_version=job_parameters['model_version'], component_module_name=module_name) task.f_start_time = current_timestamp() task.f_run_ip = get_lan_ip() task.f_run_pid = executor_pid run_class_paths = component_parameters.get('CodePath').split('/') run_class_package = '.'.join(run_class_paths[:-2]) + '.' + run_class_paths[-2].replace('.py', '') run_class_name = run_class_paths[-1] task.f_status = TaskStatus.RUNNING TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id, initiator_party_id=job_initiator.get('party_id', None), initiator_role=job_initiator.get('role', None), task_info=task.to_json()) # init environment, process is shared globally RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'], BACKEND=job_parameters.get('backend', 0)) if args.processors_per_node and args.processors_per_node > 0 and RuntimeConfig.BACKEND == Backend.EGGROLL: session_options = {"eggroll.session.processors.per.node": args.processors_per_node} else: session_options = {} session.init(job_id=job_utils.generate_session_id(task_id, role, party_id), mode=RuntimeConfig.WORK_MODE, backend=RuntimeConfig.BACKEND, options=session_options) federation.init(job_id=task_id, runtime_conf=component_parameters) schedule_logger().info('run {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id)) schedule_logger().info(component_parameters) schedule_logger().info(task_input_dsl) task_run_args = TaskExecutor.get_task_run_args(job_id=job_id, role=role, party_id=party_id, task_id=task_id, job_args=job_args, job_parameters=job_parameters, task_parameters=task_parameters, input_dsl=task_input_dsl, if_save_as_task_input_data=job_parameters.get("save_as_task_input_data", SAVE_AS_TASK_INPUT_DATA_SWITCH) ) run_object = getattr(importlib.import_module(run_class_package), run_class_name)() run_object.set_tracker(tracker=tracker) run_object.set_taskid(taskid=task_id) run_object.run(component_parameters, task_run_args) output_data = run_object.save_data() tracker.save_output_data_table(output_data, task_output_dsl.get('data')[0] if task_output_dsl.get('data') else 'component') output_model = run_object.export_model() # There is only one model output at the current dsl version. tracker.save_output_model(output_model, task_output_dsl['model'][0] if task_output_dsl.get('model') else 'default') task.f_status = TaskStatus.COMPLETE except Exception as e: task.f_status = TaskStatus.FAILED schedule_logger().exception(e) finally: sync_success = False try: task.f_end_time = current_timestamp() task.f_elapsed = task.f_end_time - task.f_start_time task.f_update_time = current_timestamp() TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id, initiator_party_id=job_initiator.get('party_id', None), initiator_role=job_initiator.get('role', None), task_info=task.to_json()) sync_success = True except Exception as e: traceback.print_exc() schedule_logger().exception(e) schedule_logger().info('task {} {} {} start time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_start_time))) schedule_logger().info('task {} {} {} end time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_end_time))) schedule_logger().info('task {} {} {} takes {}s'.format(task_id, role, party_id, int(task.f_elapsed)/1000)) schedule_logger().info( 'finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED)) print('finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))
for i in range(len(self.data)): ori_feature = [0 for i in range(len(tags))] for tag in self.data[i][1].split(" ", -1): ori_feature[tag_dict.get(tag)] = 1 ori_feature = np.asarray(ori_feature, dtype='int') self.assertTrue( np.abs(ori_feature - features).all() < consts.FLOAT_ZERO) def save_data(input_data, table_name, namespace): storage.save_data(input_data, table_name, namespace) if __name__ == '__main__': eggroll.init("test_dataio" + str(int(time.time()))) federation.init( "test_dataio", { "local": { "role": "guest", "party_id": 10000 }, "role": { "host": [9999], "guest": [10000] } }) unittest.main()
result_data = selection_guest.save_data() local_data = result_data.collect() print("data in transform") for k, v in local_data: print("k: {}, v: {}".format(k, v.features)) def tearDown(self): self.table.destroy() if __name__ == '__main__': import sys job_id = str(sys.argv[1]) eggroll.init(job_id) federation.init(job_id, {"local": { "role": "guest", "party_id": 9999 }, "role": { "host": [ 10000 ], "guest": [ 9999 ] } }) selection_obj = TestHeteroFeatureSelection() selection_obj.test_feature_selection()
default='10000') parser.add_argument('-j', '--job_id', required=True, type=str, help="job_id") args = parser.parse_args() job_id = args.job_id guest_id = args.gid host_id = args.hid role = args.role session.init(job_id) federation.init( job_id, { "local": { "role": role, "party_id": guest_id if role == GUEST else host_id }, "role": { "host": [host_id], "guest": [guest_id] } }) test_obj = TestHeteroFeatureBinning(role, guest_id, host_id) # homo_obj.test_homo_lr() test_obj.test_feature_binning() test_obj.tearDown()
meta_obj = model.get('HomoLogisticRegressionMeta') print("HomoLR meta info") print(meta_obj) param_obj = model.get('HomoLogisticRegressionParam') print("HomoLR param info") print(param_obj) def tearDown(self): self.table.destroy() if __name__ == '__main__': import sys job_id = str(sys.argv[1]) eggroll.init(job_id) federation.init( job_id, { "local": { "role": "arbiter", "party_id": 10000 }, "role": { "host": [10000], "guest": [9999], "arbiter": [10000] } }) homo_obj = TestHomoLR() # homo_obj.test_homo_lr() homo_obj.test_cv()
def init_federation(self, job_id, conf): federation.init(job_id, conf)
parser.add_argument('-j', '--job_id', required=True, type=str, help="job_id") args = parser.parse_args() job_id = args.job_id own_party_id = args.pid role = args.role print("args: {}".format(args)) session.init(job_id) federation.init( job_id, { "local": { "role": role, "party_id": own_party_id }, "role": { "host": [str(x) for x in host_id_list], "guest": ['9999'], "arbiter": ['9998'] } }) test_obj = TestHomoFeatureBinning(role, own_party_id) # homo_obj.test_homo_lr() test_obj.test_homo_split_points() test_obj.test_homo_split_points(is_sparse=True) test_obj.tearDown()
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from arch.api import eggroll from arch.api import federation if __name__ == '__main__': eggroll.init("atest") federation.init( "atest", { "local": { "role": "host", "party_id": 10002 }, "role": { "host": [10001, 10002], "arbiter": [99999], "guest": [10001] } }) for _tag in range(0, 1000, 2): c = eggroll.parallelize(range(_tag), partition=3, persistent=True).map(lambda k, v: (v, k + 1)) print(c) a = _tag federation.remote(a, "RsaIntersectTransferVariable.rsa_pubkey", tag="{}".format(_tag)) federation.remote(c, "RsaIntersectTransferVariable.rsa_pubkey",