def main(config="../../config.yaml", param="./gbdt_config_reg.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] print('config is {}'.format(config)) if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] print('data base dir is', data_base_dir) else: data_base_dir = config.data_base_dir # prepare data df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) df_host = pd.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) df = df_guest.join(df_host, rsuffix='host') y = df[label_name] X = df.drop(label_name, axis=1) clf = GradientBoostingRegressor(random_state=0, n_estimators=50) clf.fit(X, y) y_predict = clf.predict(X) result = {"mean_absolute_error": mean_absolute_error(y, y_predict)} print(result) return {}, result
def main(config="../../config.yaml", param="./lr_config.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] else: data_base_dir = config.data_base_dir config_param = { "penalty": param["penalty"], "max_iter": 100, "alpha": param["alpha"], "learning_rate": "optimal", "eta0": param["learning_rate"] } # prepare data df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) df_host = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) # df_test = pandas.read_csv(data_test, index_col=idx) df = pandas.concat([df_guest, df_host], axis=0) # df = df_guest.join(df_host, rsuffix="host") y_train = df[label_name] x_train = df.drop(label_name, axis=1) # y_test = df_test[label_name] # x_test = df_test.drop(label_name, axis=1) x_test, y_test = x_train, y_train # lm = LogisticRegression(max_iter=20) lm = SGDClassifier(loss="log", **config_param) lm_fit = lm.fit(x_train, y_train) y_pred = lm_fit.predict(x_test) acc = accuracy_score(y_test, y_pred) result = {"accuracy": acc} print('multi result', result) return {}, result
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml"): if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] else: data_base_dir = config.data_base_dir if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] # prepare data Xb = pandas.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) Xa = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) y = Xb[label_name] if param["loss"] == "categorical_crossentropy": labels = y.copy() label_encoder = LabelEncoder() y = label_encoder.fit_transform(y) y = to_categorical(y) Xb = Xb.drop(label_name, axis=1) model = build(param, Xb.shape[1], Xa.shape[1]) model.fit([Xb, Xa], y, epochs=param["epochs"], verbose=0, batch_size=param["batch_size"], shuffle=True) eval_result = {} for metric in param["metrics"]: if metric.lower() == "auc": predict_y = model.predict([Xb, Xa]) auc = metrics.roc_auc_score(y, predict_y) eval_result["auc"] = auc elif metric == "accuracy": predict_y = np.argmax(model.predict([Xb, Xa]), axis=1) predict_y = label_encoder.inverse_transform(predict_y) acc = metrics.accuracy_score(y_true=labels, y_pred=predict_y) eval_result["accuracy"] = acc print(eval_result) data_summary = {} return data_summary, eval_result
def main(config="../../config.yaml", param="param_conf.yaml"): if isinstance(param, str): param = JobConfig.load_from_file(param) if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] else: data_base_dir = config.data_base_dir epoch = param["epoch"] lr = param["lr"] batch_size = param.get("batch_size", -1) optimizer_name = param.get("optimizer", "Adam") loss = param.get("loss", "categorical_crossentropy") metrics = param.get("metrics", ["accuracy"]) layers = param["layers"] is_multy = param["is_multy"] data = dataset[param.get("dataset", "vehicle")] model = Sequential() for layer_config in layers: layer = getattr(tensorflow.keras.layers, layer_config["name"]) layer_params = layer_config["params"] model.add(layer(**layer_params)) model.compile( optimizer=getattr(optimizers, optimizer_name)(learning_rate=lr), loss=loss, metrics=metrics, ) data_path = pathlib.Path(data_base_dir) data_with_label = pandas.concat([ pandas.read_csv(data_path.joinpath(data["guest"]), index_col=0), pandas.read_csv(data_path.joinpath(data["host"]), index_col=0), ]).values data = data_with_label[:, 1:] if is_multy: labels = to_categorical(data_with_label[:, 0]) else: labels = data_with_label[:, 0] if batch_size < 0: batch_size = len(data_with_label) model.fit(data, labels, epochs=epoch, batch_size=batch_size) evaluate = model.evaluate(data, labels) metric_summary = {"accuracy": evaluate[1]} data_summary = {} return data_summary, metric_summary
def main(param=""): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] data_test = param["data_test"] idx = param["idx"] label_name = param["label_name"] # prepare data # prepare data df_guest = pd.read_csv(data_guest, index_col=idx) df_host = pd.read_csv(data_host, index_col=idx) df_test = pd.read_csv(data_test, index_col=idx) df = pd.concat([df_guest, df_host], axis=0) y = df[label_name] X = df.drop(label_name, axis=1) X_guest = df_guest.drop(label_name, axis=1) y_guest = df_guest[label_name] clf = GradientBoostingClassifier( n_estimators=50, learning_rate=0.3, ) clf.fit(X, y) y_pred = clf.predict(X_guest) acc = accuracy_score(y_guest, y_pred) result = {"accuracy": acc} print(result) return {}, result
def main(param="./linr_config.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] # prepare data df_guest = pandas.read_csv(data_guest, index_col=idx) df_host = pandas.read_csv(data_host, index_col=idx) df = df_guest.join(df_host, rsuffix="host") y = df[label_name] X = df.drop(label_name, axis=1) lm = SGDRegressor(loss="squared_loss", penalty=param["penalty"], fit_intercept=True, max_iter=param["max_iter"], average=param["batch_size"]) lm_fit = lm.fit(X, y) y_pred = lm_fit.predict(X) mse = mean_squared_error(y, y_pred) rmse = np.sqrt(mse) r2 = r2_score(y, y_pred) explained_var = explained_variance_score(y, y_pred) metric_summary = { "r2_score": r2, "mean_squared_error": mse, "root_mean_squared_error": rmse, "explained_variance": explained_var } data_summary = {} return data_summary, metric_summary
def main(param=""): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] # prepare data df_guest = pd.read_csv(data_guest, index_col=idx) df_host = pd.read_csv(data_host, index_col=idx) df = df_guest.join(df_host, rsuffix='host') y = df[label_name] X = df.drop(label_name, axis=1) clf = GradientBoostingClassifier( random_state=0, n_estimators=120 if 'epsilon' in data_guest else 50, learning_rate=0.1) clf.fit(X, y) y_prob = clf.predict(X) try: auc_score = roc_auc_score(y, y_prob) except: print(f"no auc score available") return result = {"auc": auc_score} print(result) return {}, result
def main(param=""): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] data_test = param["data_test"] idx = param["idx"] label_name = param["label_name"] # prepare data df_guest = pd.read_csv(data_guest, index_col=idx) df_host = pd.read_csv(data_host, index_col=idx) df_test = pd.read_csv(data_test, index_col=idx) df = pd.concat([df_guest, df_host], axis=0) y = df[label_name] X = df.drop(label_name, axis=1) X = df.drop(label_name, axis=1) X_guest = df_guest.drop(label_name, axis=1) y_guest = df_guest[label_name] clf = GradientBoostingRegressor(n_estimators=50) clf.fit(X, y) y_predict = clf.predict(X_guest) result = { "mean_squared_error": mean_squared_error(y_guest, y_predict), "mean_absolute_error": mean_absolute_error(y_guest, y_predict) } print(result) return {}, result
def main(param=""): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] # prepare data df_guest = pd.read_csv(data_guest, index_col=idx) df_host = pd.read_csv(data_host, index_col=idx) df = df_guest.join(df_host, rsuffix='host') y = df[label_name] X = df.drop(label_name, axis=1) clf = GradientBoostingRegressor(random_state=0, n_estimators=50, learning_rate=0.1) clf.fit(X, y) y_predict = clf.predict(X) result = { "mean_absolute_error": mean_absolute_error(y, y_predict), } print(result) return {}, result
def main(config="../../config.yaml", param="./vechile_config.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] else: data_base_dir = config.data_base_dir config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": "optimal", "eta0": param["learning_rate"], "random_state": 105 } # prepare data df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) df_host = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) df = df_guest.join(df_host, rsuffix="host") y = df[label_name] X = df.drop(label_name, axis=1) # lm = LogisticRegression(max_iter=20) lm = SGDClassifier(loss="log", **config_param, shuffle=False) lm_fit = lm.fit(X, y) y_pred = lm_fit.predict(X) recall = recall_score(y, y_pred, average="macro") pr = precision_score(y, y_pred, average="macro") acc = accuracy_score(y, y_pred) result = {"accuracy": acc} print(result) return {}, result
def main(config="../../config.yaml", param="./gbdt_config_binary.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] print('config is {}'.format(config)) if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] print('data base dir is', data_base_dir) else: data_base_dir = config.data_base_dir # prepare data df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) df_host = pd.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) df = pd.concat([df_guest, df_host], axis=0) y = df[label_name] X = df.drop(label_name, axis=1) X_guest = df_guest.drop(label_name, axis=1) y_guest = df_guest[label_name] clf = GradientBoostingClassifier( n_estimators=120 if 'epsilon' in data_guest else 50, learning_rate=0.1) clf.fit(X, y) y_prob = clf.predict(X_guest) try: auc_score = roc_auc_score(y_guest, y_prob) except: print(f"no auc score available") return result = {"auc": auc_score} import time print(result) print(data_guest) time.sleep(3) return {}, result
def main(config="../../config.yaml", param="./gbdt_config_multi.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] print('config is {}'.format(config)) if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] print('data base dir is', data_base_dir) else: data_base_dir = config.data_base_dir # prepare data df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) df_host = pd.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) df = df_guest.join(df_host, rsuffix='host') y = df[label_name] X = df.drop(label_name, axis=1) clf = GradientBoostingClassifier(random_state=0, n_estimators=50, learning_rate=0.3) clf.fit(X, y) y_pred = clf.predict(X) try: auc_score = roc_auc_score(y, y_pred) except BaseException: print(f"no auc score available") acc = accuracy_score(y, y_pred) result = {"accuracy": acc} print('multi result', result) return {}, result
def main(config="../../config.yaml", param="./gbdt_config_multi.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] print('config is {}'.format(config)) if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] print('data base dir is', data_base_dir) else: data_base_dir = config.data_base_dir # prepare data df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) df_host = pd.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) df = pd.concat([df_guest, df_host], axis=0) y = df[label_name] X = df.drop(label_name, axis=1) X_guest = df_guest.drop(label_name, axis=1) y_guest = df_guest[label_name] clf = GradientBoostingClassifier( n_estimators=50, learning_rate=0.3, ) clf.fit(X, y) y_pred = clf.predict(X_guest) acc = accuracy_score(y_guest, y_pred) result = {"accuracy": acc} print(result) return {}, result
def main(param): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_guest = param["data_guest"] data_host = param["data_host"] idx = param["idx"] label_name = param["label_name"] config_param = { "penalty": param["penalty"], "max_iter": 100, "alpha": param["alpha"], "learning_rate": "optimal", "eta0": param["learning_rate"] } # prepare data df_guest = pandas.read_csv(data_guest, index_col=idx) df_host = pandas.read_csv(data_host, index_col=idx) df = df_guest.join(df_host, rsuffix="host") y = df[label_name] X = df.drop(label_name, axis=1) # x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) x_train, x_test, y_train, y_test = X, X, y, y # lm = LogisticRegression(max_iter=20) lm = SGDClassifier(loss="log", **config_param) lm_fit = lm.fit(x_train, y_train) y_pred = lm_fit.predict(x_test) y_prob = lm_fit.predict_proba(x_test)[:, 1] try: auc_score = roc_auc_score(y_test, y_prob) except: print(f"no auc score available") return recall = recall_score(y_test, y_pred, average="macro") pr = precision_score(y_test, y_pred, average="macro") acc = accuracy_score(y_test, y_pred) # y_predict_proba = est.predict_proba(X_test)[:, 1] fpr, tpr, thresholds = roc_curve(y_test, y_prob) ks = max(tpr - fpr) result = {"auc": auc_score, "recall": recall, "precision": pr, "accuracy": acc, "ks": ks} print(result) print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}") return {}, result
def main(param=""): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) data_guest = param["data_guest"] data_host = param["data_host"] data_test = param["data_test"] idx = param["idx"] label_name = param["label_name"] # prepare data df_guest = pd.read_csv(data_guest, index_col=idx) df_host = pd.read_csv(data_host, index_col=idx) df = pd.concat([df_guest, df_host], axis=0) y = df[label_name] X = df.drop(label_name, axis=1) X_guest = df_guest.drop(label_name, axis=1) y_guest = df_guest[label_name] clf = GradientBoostingClassifier( n_estimators=120 if 'epsilon' in data_guest else 50, learning_rate=0.1) clf.fit(X, y) y_prob = clf.predict(X_guest) try: auc_score = roc_auc_score(y_guest, y_prob) except: print(f"no auc score available") return result = {"auc": auc_score} import time print(result) print(data_guest) time.sleep(3) return {}, result
def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_set = param.get("data_guest").split('/')[-1] if data_set == "default_credit_hetero_guest.csv": guest_data_table = 'default_credit_hetero_guest' host_data_table = 'default_credit_hetero_host' elif data_set == 'breast_hetero_guest.csv': guest_data_table = 'breast_hetero_guest' host_data_table = 'breast_hetero_host' elif data_set == 'give_credit_hetero_guest.csv': guest_data_table = 'give_credit_hetero_guest' host_data_table = 'give_credit_hetero_host' elif data_set == 'epsilon_5k_hetero_guest.csv': guest_data_table = 'epsilon_5k_hetero_guest' host_data_table = 'epsilon_5k_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = { "name": guest_data_table, "namespace": f"experiment{namespace}" } host_train_data = { "name": host_data_table, "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { "validation_freqs": None, "early_stopping_rounds": None, } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "early_stop": "diff", "tol": 1e-5, "init_param": { "init_method": param.get("init_method", 'random_uniform') } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary print(pipeline.get_component("evaluation_0").get_summary()) data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } result_summary = pipeline.get_component("evaluation_0").get_summary() return data_summary, result_summary
def main(config="../../config.yaml", param="./lr_config.yaml"): # obtain config if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_guest = param["data_guest"] data_host = param["data_host"] data_test = param["data_test"] idx = param["idx"] label_name = param["label_name"] if isinstance(config, str): config = JobConfig.load_from_file(config) data_base_dir = config["data_base_dir"] else: data_base_dir = config.data_base_dir config_param = { "penalty": param["penalty"], "max_iter": 100, "alpha": param["alpha"], "learning_rate": "optimal", "eta0": param["learning_rate"] } # prepare data df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx) df_host = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx) # df_test = pandas.read_csv(data_test, index_col=idx) df = pandas.concat([df_guest, df_host], axis=0) # df = df_guest.join(df_host, rsuffix="host") y_train = df[label_name] x_train = df.drop(label_name, axis=1) # y_test = df_test[label_name] # x_test = df_test.drop(label_name, axis=1) x_test, y_test = x_train, y_train # lm = LogisticRegression(max_iter=20) lm = SGDClassifier(loss="log", **config_param) lm_fit = lm.fit(x_train, y_train) y_pred = lm_fit.predict(x_test) y_prob = lm_fit.predict_proba(x_test)[:, 1] auc_score = roc_auc_score(y_test, y_prob) recall = recall_score(y_test, y_pred, average="macro") pr = precision_score(y_test, y_pred, average="macro") acc = accuracy_score(y_test, y_pred) # y_predict_proba = est.predict_proba(X_test)[:, 1] fpr, tpr, thresholds = roc_curve(y_test, y_prob) ks = max(tpr - fpr) result = {"auc": auc_score} print(f"result: {result}") print( f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}" ) return {}, result
def main(config="../../config.yaml", param="param_conf.yaml", namespace=""): num_host = 1 if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) epoch = param["epoch"] lr = param["lr"] batch_size = param.get("batch_size", -1) optimizer_name = param.get("optimizer", "Adam") encode_label = param.get("encode_label", True) loss = param.get("loss", "categorical_crossentropy") metrics = param.get("metrics", ["accuracy"]) layers = param["layers"] data = getattr(dataset, param.get("dataset", "vehicle")) guest_train_data = data["guest"] host_train_data = data["host"][:num_host] for d in [guest_train_data, *host_train_data]: d["namespace"] = f"{d['namespace']}{namespace}" hosts = config.parties.host[:num_host] pipeline = PipeLine() \ .set_initiator(role='guest', party_id=config.parties.guest[0]) \ .set_roles(guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=config.parties.guest[0]).algorithm_param( table=guest_train_data) for i in range(num_host): reader_0.get_party_instance(role='host', party_id=hosts[i]) \ .algorithm_param(table=host_train_data[i]) dataio_0 = DataIO(name="dataio_0", with_label=True) dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \ .algorithm_param(with_label=True, output_format="dense") dataio_0.get_party_instance( role='host', party_id=hosts).algorithm_param(with_label=True) homo_nn_0 = HomoNN(name="homo_nn_0", encode_label=encode_label, max_iter=epoch, batch_size=batch_size, early_stop={ "early_stop": "diff", "eps": 0.0 }) for layer_config in layers: layer = getattr(tensorflow.keras.layers, layer_config["name"]) layer_params = layer_config["params"] homo_nn_0.add(layer(**layer_params)) homo_nn_0.compile(optimizer=getattr(optimizers, optimizer_name)(learning_rate=lr), metrics=metrics, loss=loss) evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi", metrics=["accuracy", "precision", "recall"]) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_nn_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data)) pipeline.compile() pipeline.fit(backend=config.backend, work_mode=config.work_mode) metric_summary = pipeline.get_component("evaluation_0").get_summary() data_summary = dict(train={ "guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)} }, test={ "guest": guest_train_data["name"], **{ f"host_{i}": host_train_data[i]["name"] for i in range(num_host) } }) return data_summary, metric_summary
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).algorithm_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).algorithm_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param( with_label=True, output_format="dense") dataio_1.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_sbt_0 = HeteroFastSecureBoost( name="hetero_fast_sbt_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], guest_depth=param['guest_depth'], host_depth=param['host_depth'], tree_num_per_party=param['tree_num_per_party'], work_mode=param['work_mode']) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) return {}, pipeline.get_component("evaluation_0").get_summary()
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = {"name": param['data_guest_train'], "namespace": f"experiment{namespace}"} host_train_data = {"name": param['data_host_train'], "namespace": f"experiment{namespace}"} guest_validate_data = {"name": param['data_guest_val'], "namespace": f"experiment{namespace}"} host_validate_data = {"name": param['data_host_val'], "namespace": f"experiment{namespace}"} # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role="host", party_id=host).component_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_1.get_party_instance(role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_sbt_0 = HeteroFastSecureBoost(name="hetero_fast_sbt_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], guest_depth=param['guest_depth'], host_depth=param['host_depth'], tree_num_per_party=param['tree_num_per_party'], work_mode=param['work_mode'] ) hetero_fast_sbt_1 = HeteroFastSecureBoost(name="hetero_fast_sbt_1") # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(hetero_fast_sbt_1, data=Data(test_data=intersect_1.output.data), model=Model(hetero_fast_sbt_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) sbt_0_data = pipeline.get_component("hetero_fast_sbt_0").get_output_data().get("data") sbt_1_data = pipeline.get_component("hetero_fast_sbt_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = {"hetero_fast_sbt_train": desc_sbt_0, "hetero_fast_sbt_validate": desc_sbt_1} elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, metric_summary
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1") evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(homo_secureboost_1, data=Data(test_data=dataio_1.output.data), model=Model(homo_secureboost_0.output.model)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() sbt_0_data = pipeline.get_component( "homo_secureboost_0").get_output_data().get("data") sbt_1_data = pipeline.get_component( "homo_secureboost_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = { "sbt_train": desc_sbt_0, "sbt_validate": desc_sbt_1 } elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_validate_data["name"], "host": host_validate_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode if isinstance(param, str): param = JobConfig.load_from_file(param) """ guest = 9999 host = 10000 arbiter = 9999 backend = 0 work_mode = 1 param = {"penalty": "L2", "max_iter": 5} """ guest_train_data = { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param( with_label=True, output_format="dense", label_name=param["label_name"], label_type="float") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).algorithm_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") param = { "penalty": param["penalty"], "validation_freqs": None, "early_stopping_rounds": None, "max_iter": param["max_iter"], "optimizer": param["optimizer"], "learning_rate": param["learning_rate"], "init_param": param["init_param"], "batch_size": param["batch_size"], "alpha": param["alpha"] } hetero_linr_0 = HeteroLinR(name='hetero_linr_0', **param) evaluation_0 = Evaluation(name='evaluation_0', eval_type="regression", metrics=[ "r2_score", "mean_squared_error", "root_mean_squared_error", "explained_variance" ]) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_linr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) metric_summary = pipeline.get_component("evaluation_0").get_summary() data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", param="param_conf.yaml", namespace=""): num_host = 1 if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) epoch = param["epoch"] lr = param["lr"] batch_size = param.get("batch_size", -1) optimizer_name = param.get("optimizer", "Adam") encode_label = param.get("encode_label", True) loss = param.get("loss", "categorical_crossentropy") metrics = param.get("metrics", ["accuracy"]) layers = param["layers"] data = getattr(dataset, param.get("dataset", "vehicle")) guest_train_data = data["guest"] host_train_data = data["host"][:num_host] for d in [guest_train_data, *host_train_data]: d["namespace"] = f"{d['namespace']}{namespace}" hosts = config.parties.host[:num_host] pipeline = PipeLine() \ .set_initiator(role='guest', party_id=config.parties.guest[0]) \ .set_roles(guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=config.parties.guest[0]).component_param(table=guest_train_data) for i in range(num_host): reader_0.get_party_instance(role='host', party_id=hosts[i]) \ .component_param(table=host_train_data[i]) dataio_0 = DataIO(name="dataio_0", with_label=True) dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \ .component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True) homo_nn_0 = HomoNN(name="homo_nn_0", encode_label=encode_label, max_iter=epoch, batch_size=batch_size, early_stop={"early_stop": "diff", "eps": 0.0}) for layer_config in layers: layer = getattr(tensorflow.keras.layers, layer_config["name"]) layer_params = layer_config["params"] homo_nn_0.add(layer(**layer_params)) homo_nn_0.compile(optimizer=getattr(optimizers, optimizer_name)(learning_rate=lr), metrics=metrics, loss=loss) homo_nn_1 = HomoNN(name="homo_nn_1") if param["loss"] == "categorical_crossentropy": eval_type = "multi" else: eval_type = "binary" evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi", metrics=["accuracy", "precision", "recall"]) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_nn_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(homo_nn_1, data=Data(test_data=dataio_0.output.data), model=Model(homo_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode) pipeline.fit(job_parameters) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) nn_0_data = pipeline.get_component("homo_nn_0").get_output_data().get("data") nn_1_data = pipeline.get_component("homo_nn_1").get_output_data().get("data") nn_0_score = extract_data(nn_0_data, "predict_result") nn_0_label = extract_data(nn_0_data, "label") nn_1_score = extract_data(nn_1_data, "predict_result") nn_1_label = extract_data(nn_1_data, "label") nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True) nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True) if eval_type == "binary": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label), "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label, nn_1_label)} metric_summary["distribution_metrics"] = {"homo_nn": metric_nn} elif eval_type == "multi": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)} metric_summary["distribution_metrics"] = {"homo_nn": metric_nn} data_summary = dict( train={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}}, test={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}} ) return data_summary, metric_summary
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": param["guest_table_name"], "namespace": f"experiment{namespace}" } host_train_data = { "name": param["host_table_name"], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=param["epochs"], interactive_layer_lr=param["learning_rate"], batch_size=param["batch_size"], early_stop="diff") hetero_nn_0.add_bottom_model( Dense(units=param["bottom_layer_units"], input_shape=(10, ), activation="tanh", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.set_interactve_layer( Dense(units=param["interactive_layer_units"], input_shape=(param["bottom_layer_units"], ), activation="relu", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.add_top_model( Dense(units=param["top_layer_units"], input_shape=(param["interactive_layer_units"], ), activation=param["top_act"], kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) opt = getattr(optimizers, param["opt"])(lr=param["learning_rate"]) hetero_nn_0.compile(optimizer=opt, metrics=param["metrics"], loss=param["loss"]) if param["loss"] == "categorical_crossentropy": eval_type = "multi" else: eval_type = "binary" evaluation_0 = Evaluation(name="evaluation_0", eval_type=eval_type) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, pipeline.get_component("evaluation_0").get_summary()
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, pipeline.get_component('evaluation_0').get_summary()
def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_set = param.get("data_guest").split('/')[-1] if data_set == "default_credit_hetero_guest.csv": guest_data_table = 'default_credit_hetero_guest' host_data_table = 'default_credit_hetero_host' elif data_set == 'breast_hetero_guest.csv': guest_data_table = 'breast_hetero_guest' host_data_table = 'breast_hetero_host' elif data_set == 'give_credit_hetero_guest.csv': guest_data_table = 'give_credit_hetero_guest' host_data_table = 'give_credit_hetero_host' elif data_set == 'epsilon_5k_hetero_guest.csv': guest_data_table = 'epsilon_5k_hetero_guest' host_data_table = 'epsilon_5k_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"} host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform(name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "shuffle": False, "masked_rate": 0, "early_stop": "diff", "tol": 1e-5, "floating_point_precision": param.get("floating_point_precision"), "init_param": { "init_method": param.get("init_method", 'random_uniform'), "random_seed": param.get("random_seed", 103) } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters() pipeline.fit(job_parameters) lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get("data") lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get("data") lr_0_score = extract_data(lr_0_data, "predict_result") lr_0_label = extract_data(lr_0_data, "label") lr_1_score = extract_data(lr_1_data, "predict_result") lr_1_label = extract_data(lr_1_data, "label") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label), "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(lr_0_score, lr_1_score, lr_0_label, lr_1_label)} result_summary["distribution_metrics"] = {"hetero_lr": metric_lr} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, result_summary
def main(config="../../config.yaml", param="./sshe_linr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] if isinstance(param, str): param = JobConfig.load_from_file(param) guest_train_data = { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense", label_name=param["label_name"], label_type="float") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "optimizer": param["optimizer"], "learning_rate": param["learning_rate"], "init_param": param["init_param"], "batch_size": param["batch_size"], "alpha": param["alpha"], "early_stop": param["early_stop"], "reveal_strategy": param["reveal_strategy"], "tol": 1e-6, "reveal_every_iter": True } hetero_sshe_linr_0 = HeteroSSHELinR(name='hetero_sshe_linr_0', **param) hetero_sshe_linr_1 = HeteroSSHELinR(name='hetero_sshe_linr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="regression", metrics=[ "r2_score", "mean_squared_error", "root_mean_squared_error", "explained_variance" ]) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_sshe_linr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_sshe_linr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_sshe_linr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_sshe_linr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) data_linr_0 = extract_data( pipeline.get_component("hetero_sshe_linr_0").get_output_data().get( "data"), "predict_result") data_linr_1 = extract_data( pipeline.get_component("hetero_sshe_linr_1").get_output_data().get( "data"), "predict_result") desc_linr_0 = regression_metric.Describe().compute(data_linr_0) desc_linr_1 = regression_metric.Describe().compute(data_linr_1) metric_summary["script_metrics"] = { "linr_train": desc_linr_0, "linr_validate": desc_linr_1 } data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", param="./vechile_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) """ guest = 9999 host = 10000 arbiter = 9999 backend = 0 work_mode = 1 param = {"penalty": "L2", "max_iter": 5} """ data_set = param.get("data_guest").split('/')[-1] if data_set == "vehicle_scale_hetero_guest.csv": guest_data_table = 'vehicle_scale_hetero_guest' host_data_table = 'vehicle_scale_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = { "name": guest_data_table, "namespace": f"experiment{namespace}" } host_train_data = { "name": host_data_table, "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { "validation_freqs": None, "early_stopping_rounds": None, } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "early_stop": "diff", "init_param": { "init_method": param.get("init_method", 'random_uniform'), "random_seed": param.get("random_seed", 103) } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary result_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get( "data") lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get( "data") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label) } result_summary["distribution_metrics"] = {"hetero_lr": metric_lr} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, result_summary
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = {"name": param["guest_table_name"], "namespace": f"experiment{namespace}"} host_train_data = {"name": param["host_table_name"], "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True) dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=param["epochs"], interactive_layer_lr=param["learning_rate"], batch_size=param["batch_size"], early_stop="diff") hetero_nn_0.add_bottom_model(Dense(units=param["bottom_layer_units"], input_shape=(10,), activation="tanh", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.set_interactve_layer( Dense(units=param["interactive_layer_units"], input_shape=(param["bottom_layer_units"],), activation="relu", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.add_top_model( Dense(units=param["top_layer_units"], input_shape=(param["interactive_layer_units"],), activation=param["top_act"], kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) opt = getattr(optimizers, param["opt"])(lr=param["learning_rate"]) hetero_nn_0.compile(optimizer=opt, metrics=param["metrics"], loss=param["loss"]) hetero_nn_1 = HeteroNN(name="hetero_nn_1") if param["loss"] == "categorical_crossentropy": eval_type = "multi" else: eval_type = "binary" evaluation_0 = Evaluation(name="evaluation_0", eval_type=eval_type) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data)) pipeline.compile() pipeline.fit() nn_0_data = pipeline.get_component("hetero_nn_0").get_output_data().get("data") nn_1_data = pipeline.get_component("hetero_nn_1").get_output_data().get("data") nn_0_score = extract_data(nn_0_data, "predict_result") nn_0_label = extract_data(nn_0_data, "label") nn_1_score = extract_data(nn_1_data, "predict_result") nn_1_label = extract_data(nn_1_data, "label") nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True) nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) if eval_type == "binary": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label), "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label, nn_1_label)} metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn} elif eval_type == "multi": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)} metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, metric_summary