def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
# # Copyright 2019 The FATE Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from pipeline.component.homo_lr import HomoLR a = HomoLR(name="homo_lr_0") print(a.output.data) print(a.output.model)
def make_normal_dsl(config, namespace): parties = config.parties guest = parties.guest[0] hosts = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True) scale_0 = FeatureScale(name='scale_0') homo_sbt_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) # define Intersection components pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_sbt_0, data=Data(train_data=scale_0.output.data)) selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "homo_sbt_filter" ], "sbt_param": { "metrics": "feature_importance", "filter_type": "threshold", "take_high": True, "threshold": 0.03 }} feature_selection_0 = HeteroFeatureSelection(**selection_param) param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) pipeline.add_component(feature_selection_0, data=Data(data=scale_0.output.data), model=Model(isometric_model=homo_sbt_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=feature_selection_0.output.data)) evaluation_0 = Evaluation(name='evaluation_0') pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() return pipeline
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "heart_nonscaled_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "heart_nonscaled_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "heart_nonscaled_hetero_test", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "heart_nonscaled_hetero_test", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=host).algorithm_param(table=host_eval_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense", label_name='target') # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") homo_onehot_param = { "transform_col_indexes": [1, 2, 5, 6, 8, 10, 11, 12], "transform_col_names": [], "need_alignment": True } homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', **homo_onehot_param) homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1') scale_0 = FeatureScale(name='scale_0', method="standard_scale") scale_1 = FeatureScale(name='scale_1') homo_lr_param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 3, "early_stop": "diff", "batch_size": 500, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": "Paillier" }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **homo_lr_param) homo_lr_1 = HomoLR(name='homo_lr_1') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set dataio_1 to replicate model from dataio_0 pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_onehot_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_onehot_1, data=Data(data=dataio_1.output.data), model=Model(homo_onehot_0.output.model)) pipeline.add_component(scale_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(scale_1, data=Data(data=homo_onehot_1.output.data), model=Model(scale_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(test_data=scale_1.output.data), model=Model(homo_lr_0.output.model)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).algorithm_param(need_run=False) pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data])) pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))