def _init_argument(self): parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', required=True, type=str, help="Specify a config json file path") parser.add_argument('-j', '--job_id', type=str, required=True, help="Specify the job id") # parser.add_argument('-p', '--party_id', type=str, required=True, help="Specify the party id") # parser.add_argument('-l', '--LOGGER_path', type=str, required=True, help="Specify the LOGGER path") args = parser.parse_args() config_path = args.config self.config_path = config_path if not args.config: LOGGER.error("Config File should be provided") exit(-100) self.job_id = args.job_id home_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) param_validation_path = home_dir + "/conf/param_validation.json" all_checker = AllChecker(config_path, param_validation_path) all_checker.check_all() LOGGER.debug("Finish all parameter checkers") self._initialize(config_path) with open(config_path) as conf_f: runtime_json = json.load(conf_f) session.init(self.job_id, self.workflow_param.work_mode) LOGGER.debug("The job id is {}".format(self.job_id)) federation.init(self.job_id, runtime_json) LOGGER.debug("Finish eggroll and federation init") self._init_pipeline()
def setUp(self): session.init("test_cross_entropy") self.sigmoid_loss = SigmoidBinaryCrossEntropyLoss() self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def run(self, component_parameters=None, args=None): self.parameters = component_parameters["DownloadParam"] self.parameters["role"] = component_parameters["role"] self.parameters["local"] = component_parameters["local"] table_name, namespace = dtable_utils.get_table_info(config=self.parameters, create=False) job_id = self.taskid.split("_")[0] session.init(job_id, self.parameters["work_mode"]) with open(os.path.abspath(self.parameters["output_path"]), "w") as fout: data_table = session.get_data_table(name=table_name, namespace=namespace) count = data_table.count() LOGGER.info('===== begin to export data =====') lines = 0 for key, value in data_table.collect(): if not value: fout.write(key + "\n") else: fout.write(key + self.parameters.get("delimitor", ",") + value + "\n") lines += 1 if lines % 2000 == 0: LOGGER.info("===== export {} lines =====".format(lines)) if lines % 10000 == 0: job_info = {'f_progress': lines/count*100//1} self.update_job_status(self.parameters["local"]['role'], self.parameters["local"]['party_id'], job_info) self.update_job_status(self.parameters["local"]['role'], self.parameters["local"]['party_id'], {'f_progress': 100}) self.callback_metric(metric_name='data_access', metric_namespace='download', metric_data=[Metric("count", data_table.count())]) LOGGER.info("===== export {} lines totally =====".format(lines)) LOGGER.info('===== export data finish =====') LOGGER.info('===== export data file path:{} ====='.format(os.path.abspath(self.parameters["output_path"])))
def setUp(self): session.init("test_instance") dense_inst = [] headers = ['x' + str(i) for i in range(20)] for i in range(100): inst = Instance(features=(i % 16 * np.ones(20))) dense_inst.append((i, inst)) self.dense_table = session.parallelize(dense_inst, include_key=True, partition=2) self.dense_table.schema = {'header': headers} self.sparse_inst = [] for i in range(100): dict = {} indices = [] data = [] for j in range(20): idx = random.randint(0, 29) if idx in dict: continue dict[idx] = 1 val = random.random() indices.append(idx) data.append(val) sparse_vec = SparseVector(indices, data, 30) self.sparse_inst.append((i, Instance(features=sparse_vec))) self.sparse_table = session.parallelize(self.sparse_inst, include_key=True) self.sparse_table.schema = {"header": ["fid" + str(i) for i in range(30)]}
def setUp(self): session.init("test_random_sampler") self.data = [(i * 10 + 5, i * i) for i in range(100)] self.table = session.parallelize(self.data, include_key=True) self.data_to_trans = [(i * 10 + 5, i * i * i) for i in range(100)] self.table_trans = session.parallelize(self.data_to_trans, include_key=True)
def run(self, component_parameters=None, args=None): self.parameters = component_parameters["UploadParam"] self.parameters["role"] = component_parameters["role"] self.parameters["local"] = component_parameters["local"] job_id = self.taskid.split("_")[0] if not os.path.isabs(self.parameters.get("file", "")): self.parameters["file"] = os.path.join(file_utils.get_project_base_directory(), self.parameters["file"]) if not os.path.exists(self.parameters["file"]): raise Exception("%s is not exist, please check the configure" % (self.parameters["file"])) table_name, namespace = dtable_utils.get_table_info(config=self.parameters, create=True) _namespace, _table_name = self.generate_table_name(self.parameters["file"]) if namespace is None: namespace = _namespace if table_name is None: table_name = _table_name read_head = self.parameters['head'] if read_head == 0: head = False elif read_head == 1: head = True else: raise Exception("'head' in conf.json should be 0 or 1") partition = self.parameters["partition"] if partition <= 0 or partition >= self.MAX_PARTITION_NUM: raise Exception("Error number of partition, it should between %d and %d" % (0, self.MAX_PARTITION_NUM)) session.init(mode=self.parameters['work_mode']) data_table_count = self.save_data_table(table_name, namespace, head, self.parameters.get('in_version', False)) LOGGER.info("------------load data finish!-----------------") LOGGER.info("file: {}".format(self.parameters["file"])) LOGGER.info("total data_count: {}".format(data_table_count)) LOGGER.info("table name: {}, table namespace: {}".format(table_name, namespace))
def run(): parser = argparse.ArgumentParser() parser.add_argument('-j', '--job_id', required=True, type=str, help="job id") parser.add_argument('-w', '--work_mode', required=True, type=str, help="work mode") parser.add_argument('-b', '--backend', required=True, type=str, help="backend") args = parser.parse_args() job_id = args.job_id work_mode = int(args.work_mode) backend = int(args.backend) session.init(job_id=job_id, mode=work_mode, backend=backend, set_log_dir=False) try: schedule_logger(job_id.split('_')[0]).info( 'start stop session {}'.format(session.get_session_id())) session.stop() schedule_logger(job_id.split('_')[0]).info( 'stop session {} success'.format(session.get_session_id())) except Exception as e: pass
def init_table_manager_and_federation(cls, job_id, role, num_hosts, host_ind=0): from arch.api import session from arch.api import federation role_id = { "host": [10000 + i for i in range(num_hosts)], "guest": [9999], "arbiter": [9999] } session.init(job_id) federation.init( job_id, { "local": { "role": role, "party_id": role_id[role][0] if role != "host" else role_id[role][host_ind] }, "role": role_id })
def run(self, component_parameters=None, args=None): self.parameters = component_parameters["DownloadParam"] self.parameters["role"] = component_parameters["role"] self.parameters["local"] = component_parameters["local"] table_name, namespace = dtable_utils.get_table_info( config=self.parameters, create=False) job_id = "_".join(self.taskid.split("_")[:2]) session.init(job_id, self.parameters["work_mode"]) with open(os.path.abspath(self.parameters["output_path"]), "w") as fout: data_table = session.get_data_table(name=table_name, namespace=namespace) LOGGER.info('===== begin to export data =====') lines = 0 for key, value in data_table.collect(): if not value: fout.write(key + "\n") else: fout.write(key + self.parameters.get("delimitor", ",") + str(value) + "\n") lines += 1 if lines % 2000 == 0: LOGGER.info("===== export {} lines =====".format(lines)) LOGGER.info("===== export {} lines totally =====".format(lines)) LOGGER.info('===== export data finish =====') LOGGER.info('===== export data file path:{} ====='.format( os.path.abspath(self.parameters["output_path"])))
def do_export_file(job_id, _data): try: work_mode = _data.get("work_mode") name = _data.get("table_name") namespace = _data.get("namespace") delimitor = _data.get("delimitor", ",") output_path = _data.get("output_path") # todo: use eggroll as default storage backend session.init(job_id=job_id, mode=work_mode, backend=Backend.EGGROLL) with open(os.path.abspath(output_path), "w") as fout: data_table = session.get_data_table(name=name, namespace=namespace) print('===== begin to export data =====') lines = 0 for key, value in data_table.collect(): if not value: fout.write(key + "\n") else: fout.write(key + delimitor + str(value) + "\n") lines += 1 if lines % 2000 == 0: print("===== export {} lines =====".format(lines)) print("===== export {} lines totally =====".format(lines)) print('===== export data finish =====') except: raise ValueError("cannot export data, please check json file")
def test_plain_lr(): from sklearn.datasets import make_moons import functools # 修改flow_id 否则内存表可能被覆盖 session.init(mode=0) ns = str(uuid.uuid1()) X = session.table('testX7', ns, partition=2) Y = session.table('testY7', ns, partition=2) b = np.array([0]) eta = 1.2 max_iter = 10 total_num = 500 _x, _y = make_moons(total_num, noise=0.25,random_state=12345) for i in range(np.shape(_y)[0]): X.put(i, _x[i]) Y.put(i, _y[i]) print(len([y for y in Y.collect()])) current_milli_time = lambda: int(round(time.time() * 1000)) start = current_milli_time() #shape_w = [1, np.shape(_x)[1]] shape_w = [np.shape(_x)[1]] w = np.ones(shape_w) print(w) X = TensorInEgg(None,None,X) Y = TensorInEgg(None,None,Y) w = TensorInPy(None,None,w) b = TensorInPy(None, None, b) # lr = LR(shape_w) # lr.train(X, Y) itr = 0 while itr < max_iter: H = 1 / X H = 1.0 / (1 + ((X @ w + b) * -1).map(np.exp)) R = H - Y gradient_w = (R * X).sum() / total_num gradient_b = R.sum() / total_num w = w - eta * gradient_w b = b - eta * gradient_b print("aaa",w,b) # self.plot(itr) itr += 1 print("train total time: {}".format(current_milli_time() - start)) _x_test, _y_test = make_moons(50,random_state=12345) _x_test = TensorInPy(None,None, _x_test) y_pred = 1.0 / (1 + ((_x_test @ w + b) * -1).map(np.exp)) from sklearn import metrics auc = metrics.roc_auc_score(_y_test, y_pred.store.reshape(50)) print("auc: {}".format(auc))
def setUp(self): session.init("test_fair_loss") self.log_cosh_loss = LogCoshLoss() self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False) self.predict = session.parallelize(self.predict_list, include_key=False)
def setUp(self): session.init("test_least_abs_error_loss") self.lae_loss = LeastAbsoluteErrorLoss() self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False) self.predict = session.parallelize(self.predict_list, include_key=False)
def setUp(self): self.feature_histogram = FeatureHistogram() session.init("test_feature_histogram") data_insts = [] for i in range(1000): indices = [] data = [] for j in range(10): x = random.randint(0, 5) if x != 0: data.append(x) indices.append(j) sparse_vec = SparseVector(indices, data, shape=10) data_insts.append((Instance(features=sparse_vec), (1, random.randint(0, 3)))) self.node_map = {0: 0, 1: 1, 2: 2, 3: 3} self.data_insts = data_insts self.data_bin = session.parallelize(data_insts, include_key=False, partition=16) self.grad_and_hess_list = [(random.random(), random.random()) for i in range(1000)] self.grad_and_hess = session.parallelize(self.grad_and_hess_list, include_key=False, partition=16) bin_split_points = [] for i in range(10): bin_split_points.append(np.array([i for i in range(5)])) self.bin_split_points = np.array(bin_split_points) self.bin_sparse = [0 for i in range(10)]
def setUp(self): session.init("test_encrypt_mode_calculator") self.list_data = [] self.tuple_data = [] self.numpy_data = [] for i in range(30): list_value = [100 * i + j for j in range(20)] tuple_value = tuple(list_value) numpy_value = np.array(list_value, dtype="int") self.list_data.append(list_value) self.tuple_data.append(tuple_value) self.numpy_data.append(numpy_value) self.data_list = session.parallelize(self.list_data, include_key=False, partition=10) self.data_tuple = session.parallelize(self.tuple_data, include_key=False, partition=10) self.data_numpy = session.parallelize(self.numpy_data, include_key=False, partition=10)
def setUp(self): session.init("test_cross_entropy") self.softmax_loss = SoftmaxCrossEntropyLoss() self.y_list = [i % 5 for i in range(100)] self.predict_list = [np.array([random.random() for i in range(5)]) for j in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def setUp(self): session.init("test_huber_loss") self.delta = 1 self.huber_loss = HuberLoss(self.delta) self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False) self.predict = session.parallelize(self.predict_list, include_key=False)
def setUp(self): session.init("test_fair_loss") self.rho = 0.5 self.tweedie_loss = TweedieLoss(self.rho) self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False) self.predict = session.parallelize(self.predict_list, include_key=False)
def setUp(self): self.jobid = str(uuid.uuid1()) session.init(self.jobid) from federatedml.statistic.intersect.intersect_host import RsaIntersectionHost from federatedml.statistic.intersect.intersect_host import RawIntersectionHost intersect_param = IntersectParam() self.rsa_operator = RsaIntersectionHost(intersect_param) self.raw_operator = RawIntersectionHost(intersect_param)
def setUp(self): session.init("test_label_checker") self.small_label_set = [Instance(label=i % 5) for i in range(100)] self.classify_inst = session.parallelize(self.small_label_set, include_key=False) self.regression_label = [Instance(label=random.random()) for i in range(100)] self.regression_inst = session.parallelize(self.regression_label) self.classify_checker = ClassifyLabelChecker() self.regression_checker = RegressionLabelChecker()
def _init_argument(self): with open(config_path) as conf_f: runtime_json = json.load(conf_f) self._initialize(runtime_json) LOGGER.debug("The Arbiter job id is {}".format(job_id)) LOGGER.debug("The Arbiter work mode id is {}".format( self.workflow_param.work_mode)) session.init(job_id, self.workflow_param.work_mode) federation.init(job_id, runtime_json) LOGGER.debug("Finish eggroll and federation init")
def clean_tables(self): from arch.api import session session.init(job_id=self.job_id) try: session.cleanup("*", self.job_id, True) except EnvironmentError: pass try: session.cleanup("*", self.job_id, False) except EnvironmentError: pass
def setUp(self): session.init("test_stratified_sampler") self.data = [] self.data_to_trans = [] for i in range(1000): self.data.append((i, Instance(label=i % 4, features=i * i))) self.data_to_trans.append((i, Instance(features=i**3))) self.table = session.parallelize(self.data, include_key=True) self.table_trans = session.parallelize(self.data_to_trans, include_key=True)
def init_session_and_federation(job_id, role, partyid, partyid_map): from arch.api import session from arch.api import federation session.init(job_id) federation.init(job_id=job_id, runtime_conf={ "local": { "role": role, "party_id": partyid }, "role": partyid_map })
def setUp(self): session.init("123") self.data_num = 1000 self.feature_num = 200 final_result = [] for i in range(self.data_num): tmp = i * np.ones(self.feature_num) inst = Instance(inst_id=i, features=tmp, label=0) tmp = (str(i), inst) final_result.append(tmp) table = session.parallelize(final_result, include_key=True, partition=3) self.table = table
def setUp(self): self.job_id = str(uuid.uuid1()) session.init(self.job_id) data_num = 100 feature_num = 8 self.prepare_data(data_num, feature_num) local_baseline_obj = LocalBaseline() local_baseline_obj.need_run = True local_baseline_obj.header = [ "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8" ] local_baseline_obj.model_name = "LogisticRegression" local_baseline_obj.model_opts = {} self.local_baseline_obj = local_baseline_obj
def init_session_for_flow_server(): # Options are used with different backend on demand session.init(job_id="session_used_by_fate_flow_server_{}".format(fate_uuid()), mode=RuntimeConfig.WORK_MODE, backend=RuntimeConfig.BACKEND, options={"eggroll.session.processors.per.node": 1}) # init session detect table detect_table = session.table(namespace=DETECT_TABLE[0], name=DETECT_TABLE[1], partition=DETECT_TABLE[2]) detect_table.destroy() detect_table = session.table(namespace=DETECT_TABLE[0], name=DETECT_TABLE[1], partition=DETECT_TABLE[2]) detect_table.put_all(enumerate(range(DETECT_TABLE[2]))) stat_logger.info("init detect table {} {} for session {}".format(detect_table.get_namespace(), detect_table.get_name(), session.get_session_id())) stat_logger.info("init session {} for fate flow server successfully".format(session.get_session_id()))
def session_init(job_id, idx): from arch.api import session from arch.api import federation role = "guest" if idx < 1 else "host" party_id = 9999 + idx if idx < 1 else 10000 + (idx - 1) role_parties = { "host": [10000 + i for i in range(NUM_HOSTS)], "guest": [9999 + i for i in range(1)] } session.init(job_id) federation.init( job_id, dict(local=dict(role=role, party_id=party_id), role=role_parties)) return federation.local_party(), federation.all_parties()
def setUp(self): self.job_id = str(uuid.uuid1()) session.init(self.job_id) model = HeteroStepwise() model.__setattr__('role', consts.GUEST) model.__setattr__('fit_intercept', True) self.model = model data_num = 100 feature_num = 5 bool_list = [True, False, True, True, False] self.str_mask = "10110" self.header = ["x1", "x2", "x3", "x4", "x5"] self.mask = self.prepare_mask(bool_list) self.table = self.prepare_data(data_num, feature_num, self.header, "id", "y")
def init(job_id, runtime_conf, mode, server_conf_path="arch/conf/server_conf.json"): session.init(job_id, mode) print("runtime_conf:{}".format(runtime_conf)) all_checker = AllChecker(runtime_conf) all_checker.check_all() with open(runtime_conf) as conf_p: runtime_json = json.load(conf_p) if mode is None: raise EnvironmentError( "eggroll should be initialized before fate_script") if mode == WorkMode.STANDALONE: RuntimeInstance.FEDERATION = standalone_fate_script.init( job_id=job_id, runtime_conf=runtime_json) else: RuntimeInstance.FEDERATION = cluster_fate_script.init( job_id=job_id, runtime_conf=runtime_json, server_conf_path=server_conf_path)