def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="doctorco", label_type="float", output_format="dense") data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0", early_stop="weight_diff", max_iter=10, alpha=100.0, batch_size=-1, learning_rate=0.01, optimizer="rmsprop", exposure_colname="exposure", decay_sqrt=False, tol=0.001, init_param={"init_method": "zeros"}, penalty="L2", encrypted_mode_calculator_param={"mode": "fast"}) evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1) evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_poisson_0.output.data)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component([data_transform_0, intersection_0, hetero_poisson_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component(pipeline, data=Data(predict_input={pipeline.data_transform_0.input.data: reader_0.output.data})) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "nus_wide_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "nus_wide_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=False) hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0', epochs=10, alpha=1, batch_size=-1, mode='plain') hetero_ftl_0.add_nn_layer(Dense(units=32, activation='sigmoid', kernel_initializer=initializers.RandomNormal(stddev=1.0, dtype="float32"), bias_initializer=initializers.Zeros())) hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01)) evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(hetero_ftl_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_ftl_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) # predict # deploy required components pipeline.deploy_component([dataio_0, hetero_ftl_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component(pipeline, data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data})) # run predict model predict_pipeline.predict(backend=backend, work_mode=work_mode)
def run_homo_nn_pipeline(config, namespace, data: dict, nn_component, num_host): if isinstance(config, str): config = load_job_config(config) guest_train_data = data["guest"] host_train_data = data["host"][:num_host] for d in [guest_train_data, *host_train_data]: d["namespace"] = f"{d['namespace']}{namespace}" hosts = config.parties.host[:num_host] pipeline = (PipeLine().set_initiator( role="guest", party_id=config.parties.guest[0]).set_roles( guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter)) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role="guest", party_id=config.parties.guest[0]).component_param( table=guest_train_data) for i in range(num_host): reader_0.get_party_instance( role="host", party_id=hosts[i]).component_param(table=host_train_data[i]) dataio_0 = DataIO(name="dataio_0", with_label=True) dataio_0.get_party_instance( role="guest", party_id=config.parties.guest[0]).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=hosts).component_param(with_label=True) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(nn_component, data=Data(train_data=dataio_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode) pipeline.fit(job_parameters) print(pipeline.get_component("homo_nn_0").get_summary()) pipeline.deploy_component([dataio_0, nn_component]) # predict predict_pipeline = PipeLine() predict_pipeline.add_component(reader_0) predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data }), ) # run predict model predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_label=False) intersection_0 = Intersection(name="intersection_0") feature_imputation_0 = FeatureImputation(name="feature_imputation_0", default_value=42, missing_impute=[0]) feature_imputation_0.get_party_instance(role='guest', party_id=guest).component_param( col_missing_fill_method={"doctorco": "min", "hscore": "designated"}) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(feature_imputation_0, data=Data(data=intersection_0.output.data)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component([data_transform_0, intersection_0, feature_imputation_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component(pipeline, data=Data(predict_input={pipeline.data_transform_0.input.data: reader_0.output.data})) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = { "name": "ionosphere_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "ionosphere_scale_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", label_name="label") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) data_transform_1 = DataTransform(name="data_transform_1", output_format="sparse", label_name="label") data_transform_1.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) data_transform_1.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") param = { "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": [0, 1, 2], "transform_names": None, "transform_type": "bin_num" } } hetero_feature_binning_0 = HeteroFeatureBinning( name="hetero_feature_binning_0", **param) hetero_feature_binning_0.get_party_instance( role="host", party_id=host).component_param( transform_param={"transform_type": None}) hetero_feature_binning_1 = HeteroFeatureBinning( name="hetero_feature_binning_1", **param) hetero_feature_binning_0.get_party_instance( role="host", party_id=host).component_param( transform_param={"transform_type": None}) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_feature_binning_1, data=Data(data=intersection_1.output.data)) pipeline.compile() pipeline.fit() pipeline.deploy_component( [data_transform_0, intersection_0, hetero_feature_binning_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_pearson_0 = HeteroPearson(name='hetero_pearson_0', column_indexes=-1) hetero_binning_0 = HeteroFeatureBinning(name='hetero_binning_0') selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["vif_filter", "correlation_filter"], "vif_param": { "threshold": 5 }, "correlation_param": { "threshold": 0.5, "select_federated": False } } hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_pearson_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_pearson_0.output.model, hetero_binning_0.output.model ])) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # predict # deploy required components pipeline.deploy_component([ dataio_0, intersection_0, hetero_binning_0, hetero_feature_selection_0 ]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance(role='host', party_id=host).algorithm_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) deploy_components = [dataio_0, scale_0, homo_lr_0] pipeline.deploy_component(components=deploy_components) # predict_pipeline = PipeLine() # # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # # add selected components from train pipeline onto predict pipeline # # specify data source predict_pipeline.add_component(pipeline, data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data})) predict_pipeline.compile() predict_pipeline.predict(backend=backend, work_mode=work_mode) dsl_json = predict_pipeline.get_predict_dsl() conf_json = predict_pipeline.get_predict_conf() # import json json.dump(dsl_json, open('./h**o-lr-normal-predict-dsl.json', 'w'), indent=4) json.dump(conf_json, open('./h**o-lr-normal-predict-conf.json', 'w'), indent=4) # query component summary print(json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print(json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} guest_validate_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} host_validate_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) data_transform_0, data_transform_1 = DataTransform(name="data_transform_0"), DataTransform(name='data_transform_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param( with_label=False, output_format="dense") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_1.get_party_instance( role='host', party_id=host).component_param( with_label=True, output_format="dense") intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") param = { "method": "quantile", "optimal_binning_param": { "metric_method": "gini", "min_bin_pct": 0.05, "max_bin_pct": 0.8, "init_bucket_method": "quantile", "init_bin_nums": 100, "mixture": True }, "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": [0, 1, 2], "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } hetero_feature_binning_0 = HeteroFeatureBinning(name="hetero_feature_binning_0", **param) hetero_feature_binning_1 = HeteroFeatureBinning(name='hetero_feature_binning_1') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component( data_transform_1, data=Data( data=reader_1.output.data), model=Model( data_transform_0.output.model)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_feature_binning_1, data=Data(data=intersection_1.output.data), model=Model(hetero_feature_binning_0.output.model)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component([data_transform_0, intersection_0, hetero_feature_binning_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data})) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role="host", party_id=host).algorithm_param(table=host_train_data) # define ColumnExpand components column_expand_0 = ColumnExpand(name="column_expand_0") column_expand_0.get_party_instance( role="guest", party_id=guest).algorithm_param( need_run=True, method="manual", append_header=["x_0", "x_1", "x_2", "x_3"], fill_value=[0, 0.2, 0.5, 1]) column_expand_0.get_party_instance( role="host", party_id=host).algorithm_param(need_run=False) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest", party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0", intersect_method="rsa", sync_intersect_ids=True, only_output_key=False) param = { "penalty": "L2", "optimizer": "nesterov_momentum_sgd", "tol": 0.0001, "alpha": 0.01, "max_iter": 20, "early_stop": "weight_diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "random_uniform" }, "sqn_param": { "update_interval_L": 3, "memory_M": 5, "sample_size": 5000, "random_seed": None } } hetero_lr_0 = HeteroLR(name="hetero_lr_0", **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(column_expand_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_0, data=Data(data=column_expand_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print(pipeline.get_component("hetero_lr_0").get_summary()) # predict # deploy required components pipeline.deploy_component( [column_expand_0, dataio_0, intersection_0, hetero_lr_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.column_expand_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_label=True, output_format="dense", label_type="int", label_name="y") homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", optimizer="sgd", tol=0.0001, alpha=0.01, max_iter=30, batch_size=-1, early_stop="weight_diff", learning_rate=0.15, init_param={"init_method": "zeros"}) local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={ "penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "saga", "max_iter": 2 }) local_baseline_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) evaluation_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=data_transform_0.output.data)) pipeline.add_component(local_baseline_0, data=Data(train_data=data_transform_0.output.data)) pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data])) pipeline.compile() pipeline.fit() # predict pipeline.deploy_component([data_transform_0, homo_lr_0, local_baseline_0]) predict_pipeline = PipeLine() predict_pipeline.add_component(reader_0) predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) predict_pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data])) predict_pipeline.predict()
def main(): # parties config guest = 9999 host = 10000 arbiter = 10000 # 0 for eggroll, 1 for spark backend = Backend.EGGROLL # 0 for standalone, 1 for cluster work_mode = WorkMode.STANDALONE # use the work mode below for cluster deployment # work_mode = WorkMode.CLUSTER # specify input data name & namespace in database guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"} guest_eval_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role="host", party_id=host).algorithm_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_eval_data) reader_1.get_party_instance( role="host", party_id=host).algorithm_param(table=host_eval_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") dataio_1 = DataIO(name="dataio_1") # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest", party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") # define HeteroLR component hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="weight_diff", learning_rate=0.15, optimizer="rmsprop", max_iter=10, early_stopping_rounds=2, validation_freqs=1) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set dataio_1 to replicate model from dataio_0 pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data, validate_data=intersection_1.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary import json print( json.dumps(pipeline.get_component("hetero_lr_0").get_summary(), indent=4)) # predict # deploy required components pipeline.deploy_component([dataio_0, intersection_0, hetero_lr_0]) # initiate predict pipeline predict_pipeline = PipeLine() reader_2 = Reader(name="reader_2") reader_2.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_eval_data) reader_2.get_party_instance( role="host", party_id=host).algorithm_param(table=host_eval_data) # add data reader onto predict pipeline predict_pipeline.add_component(reader_2) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_2.output.data })) # run predict model predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, label_name="y", label_type="int", output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") sample_weight_0 = SampleWeight(name="sample_weight_0") sample_weight_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True, class_weight="balanced") sample_weight_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) sample_weight_1 = SampleWeight(name="sample_weight_1") hetero_lr_0 = HeteroLR(name="hetero_lr_0", optimizer="nesterov_momentum_sgd", tol=0.001, alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, init_param={"init_method": "zeros"}) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(sample_weight_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(sample_weight_1, data=Data(data=intersection_0.output.data), model=Model(model=sample_weight_0.output.model)) pipeline.add_component(hetero_lr_0, data=Data(train_data=sample_weight_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component( [data_transform_0, intersection_0, sample_weight_0, hetero_lr_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed", label_type="float", output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") sample_weight_0 = SampleWeight(name="sample_weight_0") sample_weight_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True, sample_weight_name="pm") sample_weight_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) hetero_linr_0 = HeteroSSHELinR(name="hetero_linr_0", penalty="L2", optimizer="rmsprop", tol=0.001, alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, init_param={"init_method": "zeros"}, reveal_every_iter=True, reveal_strategy="respectively") evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1) # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(sample_weight_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=sample_weight_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_linr_0.output.data)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component( [data_transform_0, intersection_0, hetero_linr_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense", label_type="int", label_name="y") dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0", intersect_method="rsa", sync_intersect_ids=True, only_output_key=False) hetero_lr_0 = HeteroLR(name="hetero_lr_0", penalty="L2", optimizer="nesterov_momentum_sgd", tol=0.0001, alpha=0.0001, max_iter=30, batch_size=-1, early_stop="diff", learning_rate=0.15, init_param={"init_method": "zeros"}) local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={ "penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "lbfgs", "max_iter": 5, "multi_class": "ovr" }) local_baseline_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi", pos_label=1) evaluation_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(local_baseline_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component( evaluation_0, data=Data( data=[hetero_lr_0.output.data, local_baseline_0.output.data])) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # predict pipeline.deploy_component( [dataio_0, intersection_0, hetero_lr_0, local_baseline_0]) predict_pipeline = PipeLine() predict_pipeline.add_component(reader_0) predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data })) predict_pipeline.add_component( evaluation_0, data=Data( data=[hetero_lr_0.output.data, local_baseline_0.output.data])) predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_homo_test", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_homo_test", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name='data_transform_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role='host', party_id=host).component_param(with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={"max_depth": 3}, validation_freqs=1) evaluation_0 = Evaluation(name='evaluation_0', eval_type='binary') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data( train_data=data_transform_0.output.data, validate_data=data_transform_1.output.data)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component([data_transform_0, homo_secureboost_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data })) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"} # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) lr_param = { "name": "hetero_sshe_lr_0", "penalty": "L2", "tol": 0.0001, "alpha": 10, "max_iter": 30, "early_stop": "weight_diff", "batch_size": -1, "learning_rate": 0.3, "decay": 0.5, "init_param": { "init_method": "const", "init_const": 200, "fit_intercept": False }, "encrypt_param": { "key_length": 1024 } } hetero_sshe_lr_0 = HeteroSSHELR(**lr_param) pipeline.add_component(hetero_sshe_lr_0, data=Data(train_data=intersection_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=hetero_sshe_lr_0.output.data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary()) prettify(pipeline.get_component("evaluation_0").get_summary()) pipeline.deploy_component( [data_transform_0, intersection_0, hetero_sshe_lr_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict() return pipeline
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = [{ "name": "motor_hetero_host", "namespace": f"experiment{namespace}" }, { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" }] pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=hosts, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=hosts[0]).algorithm_param(table=host_train_data[0]) reader_0.get_party_instance( role='host', party_id=hosts[1]).algorithm_param(table=host_train_data[1]) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param( with_label=True, label_name="motor_speed", label_type="float", output_format="dense") dataio_0.get_party_instance( role='host', party_id=hosts).algorithm_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_linr_0 = HeteroLinR( name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001, alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, init_param={"init_method": "zeros"}, encrypted_mode_calculator_param={"mode": "fast"}) evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1) # evaluation_0.get_party_instance(role='host', party_id=hosts[0]).algorithm_param(need_run=False) # evaluation_0.get_party_instance(role='host', party_id=hosts[1]).algorithm_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_linr_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) # predict # deploy required components pipeline.deploy_component([dataio_0, intersection_0, hetero_linr_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_secure_boost_0 = HeteroFastSecureBoost( name="hetero_fast_secure_boost_0", num_trees=4, tree_num_per_party=1, task_type='classification', objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": 3}, validation_freqs=1, work_mode='mix') # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component( evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) print("fitting hetero fast secureboost done, result:") print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary()) # predict # deploy required components pipeline.deploy_component( [dataio_0, intersect_0, hetero_fast_secure_boost_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict(job_parameters)
def main(): # parties config guest = 9999 host = 10000 arbiter = 10000 # specify input data name & namespace in database guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"} guest_eval_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) # define DataTransform component data_transform_0 = DataTransform(name="data_transform_0") # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role="guest", party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") # define HeteroLR component hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="diff", learning_rate=0.15, optimizer="rmsprop", max_iter=10) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) # set train data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary import json print( json.dumps(pipeline.get_component("hetero_lr_0").get_summary(), indent=4)) # predict # deploy required components pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0]) # initiate predict pipeline predict_pipeline = PipeLine() # define new data reader reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_eval_data) # define evaluation component evaluation_0 = Evaluation(name="evaluation_0") evaluation_0.get_party_instance( role="guest", party_id=guest).component_param(need_run=True, eval_type="binary") evaluation_0.get_party_instance( role="host", party_id=host).component_param(need_run=False) # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data })) # add evaluation component to predict pipeline predict_pipeline.add_component( evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data)) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "Paillier"}, tree_param={"max_depth": 3}, validation_freqs=1, EINI_inference=True, EINI_random_mask=True) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secure_boost_0.output.data)) pipeline.compile() pipeline.fit() print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_secure_boost_0").get_summary()) print('start to predict') # predict # deploy required components pipeline.deploy_component( [data_transform_0, intersect_0, hetero_secure_boost_0, evaluation_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict() predict_result = predict_pipeline.get_component( "hetero_secure_boost_0").get_output_data() print("Showing 10 data of predict result") for ret in predict_result["data"][:10]: print(ret)