def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "student_homo_guest", "namespace": f"experiment{namespace}"} guest_validate_data = {"name": "student_homo_test", "namespace": f"experiment{namespace}"} host_train_data = {"name": "student_homo_host", "namespace": f"experiment{namespace}"} host_validate_data = {"name": "student_homo_test", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) data_transform_0, data_transform_1 = DataTransform(name="data_transform_0"), DataTransform(name='data_transform_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense", label_type="float") data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense", label_type="float") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data) data_transform_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense", label_type="float") data_transform_1.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense", label_type="float") homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='regression', objective_param={"objective": "lse"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) evaluation_0 = Evaluation(name='evaluation_0', eval_type='regression') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=data_transform_0.output.data, validate_data=data_transform_1.output.data )) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "vehicle_scale_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_homo_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0 = DataIO(name="dataio_0") reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={"max_depth": 3}, cv_param={ "need_cv": True, "shuffle": False, "n_splits": 5 }) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) data_transform_0 = DataTransform(name="data_transform_0") reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense", label_type="float") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=True, output_format="dense", label_type="float") homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='regression', objective_param={"objective": "lse"}, tree_param={"max_depth": 3}, cv_param={ "need_cv": True, "shuffle": False, "n_splits": 5 }) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=data_transform_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "vehicle_scale_homo_guest", "namespace": f"experiment{namespace}"} guest_validate_data = {"name": "vehicle_scale_homo_test", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_homo_host", "namespace": f"experiment{namespace}"} host_validate_data = {"name": "vehicle_scale_homo_test", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=True, output_format="dense") reader_1.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_validate_data) reader_1.get_party_instance(role='host', party_id=host).algorithm_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).algorithm_param(with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) evaluation_0 = Evaluation(name='evaluation_0', eval_type='multi') pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data )) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode)
def make_normal_dsl(config, namespace): parties = config.parties guest = parties.guest[0] hosts = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True) scale_0 = FeatureScale(name='scale_0') homo_sbt_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) # define Intersection components pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_sbt_0, data=Data(train_data=scale_0.output.data)) selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "homo_sbt_filter" ], "sbt_param": { "metrics": "feature_importance", "filter_type": "threshold", "take_high": True, "threshold": 0.03 }} feature_selection_0 = HeteroFeatureSelection(**selection_param) param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) pipeline.add_component(feature_selection_0, data=Data(data=scale_0.output.data), model=Model(isometric_model=homo_sbt_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=feature_selection_0.output.data)) evaluation_0 = Evaluation(name='evaluation_0') pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() return pipeline
# # Copyright 2019 The FATE Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from pipeline.component.homo_secureboost import HomoSecureBoost a = HomoSecureBoost(name="homo_secureboost_0") print(a.output.data) print(a.output.model)
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, pipeline.get_component('evaluation_0').get_summary()
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1") evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(homo_secureboost_1, data=Data(test_data=dataio_1.output.data), model=Model(homo_secureboost_0.output.model)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() sbt_0_data = pipeline.get_component( "homo_secureboost_0").get_output_data().get("data") sbt_1_data = pipeline.get_component( "homo_secureboost_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = { "sbt_train": desc_sbt_0, "sbt_validate": desc_sbt_1 } elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_validate_data["name"], "host": host_validate_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_homo_test", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_homo_test", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name='data_transform_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role='host', party_id=host).component_param(with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={"max_depth": 3}, validation_freqs=1) evaluation_0 = Evaluation(name='evaluation_0', eval_type='binary') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data( train_data=data_transform_0.output.data, validate_data=data_transform_1.output.data)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component([data_transform_0, homo_secureboost_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data })) # run predict model predict_pipeline.predict()