def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "student_homo_guest", "namespace": f"experiment{namespace}"} guest_validate_data = {"name": "student_homo_test", "namespace": f"experiment{namespace}"} host_train_data = {"name": "student_homo_host", "namespace": f"experiment{namespace}"} host_validate_data = {"name": "student_homo_test", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) data_transform_0, data_transform_1 = DataTransform(name="data_transform_0"), DataTransform(name='data_transform_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense", label_type="float") data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense", label_type="float") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data) data_transform_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense", label_type="float") data_transform_1.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense", label_type="float") homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='regression', objective_param={"objective": "lse"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) evaluation_0 = Evaluation(name='evaluation_0', eval_type='regression') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=data_transform_0.output.data, validate_data=data_transform_1.output.data )) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = { "name": "nus_wide_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "nus_wide_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0', epochs=10, alpha=1, batch_size=-1, mode='plain', communication_efficient=True, local_round=5) hetero_ftl_0.add_nn_layer( Dense(units=32, activation='sigmoid', kernel_initializer=initializers.RandomNormal(stddev=1.0), bias_initializer=initializers.Zeros())) hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01)) evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(hetero_ftl_0, data=Data(train_data=data_transform_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_ftl_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "vehicle_scale_homo_guest", "namespace": f"experiment{namespace}"} guest_validate_data = {"name": "vehicle_scale_homo_test", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_homo_host", "namespace": f"experiment{namespace}"} host_validate_data = {"name": "vehicle_scale_homo_test", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=True, output_format="dense") reader_1.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_validate_data) reader_1.get_party_instance(role='host', party_id=host).algorithm_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).algorithm_param(with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) evaluation_0 = Evaluation(name='evaluation_0', eval_type='multi') pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data )) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "nus_wide_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "nus_wide_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=False) hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0', epochs=10, alpha=1, batch_size=-1, mode='plain') hetero_ftl_0.add_nn_layer(Dense(units=32, activation='sigmoid', kernel_initializer=initializers.RandomNormal(stddev=1.0, dtype="float32"), bias_initializer=initializers.Zeros())) hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01)) evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(hetero_ftl_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_ftl_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) # predict # deploy required components pipeline.deploy_component([dataio_0, hetero_ftl_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component(pipeline, data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data})) # run predict model predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "iv_filter"], "manually_param": { "filter_out_indexes": [1] }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.001] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) param = {"k": 3, "max_iter": 10} hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param) evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=hetero_feature_binning_0.output.model)) pipeline.add_component( hetero_kmeans_0, data=Data(train_data=hetero_feature_selection_0.output.data)) print(f"data: {hetero_kmeans_0.output.data.data[0]}") pipeline.add_component(evaluation_0, data=Data(data=hetero_kmeans_0.output.data.data[0])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary print(pipeline.get_component("hetero_kmeans_0").get_summary())
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = {"name": param['data_guest_train'], "namespace": f"experiment{namespace}"} host_train_data = {"name": param['data_host_train'], "namespace": f"experiment{namespace}"} guest_validate_data = {"name": param['data_guest_val'], "namespace": f"experiment{namespace}"} host_validate_data = {"name": param['data_host_val'], "namespace": f"experiment{namespace}"} # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role="host", party_id=host).component_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_1.get_party_instance(role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_sbt_0 = HeteroFastSecureBoost(name="hetero_fast_sbt_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], guest_depth=param['guest_depth'], host_depth=param['host_depth'], tree_num_per_party=param['tree_num_per_party'], work_mode=param['work_mode'] ) hetero_fast_sbt_1 = HeteroFastSecureBoost(name="hetero_fast_sbt_1") # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(hetero_fast_sbt_1, data=Data(test_data=intersect_1.output.data), model=Model(hetero_fast_sbt_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) sbt_0_data = pipeline.get_component("hetero_fast_sbt_0").get_output_data().get("data") sbt_1_data = pipeline.get_component("hetero_fast_sbt_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = {"hetero_fast_sbt_train": desc_sbt_0, "hetero_fast_sbt_validate": desc_sbt_1} elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, metric_summary
def make_normal_dsl(config, namespace, lr_param, is_multi_host=False, has_validate=False, is_cv=False, is_ovr=False, is_dense=True, need_evaluation=True): parties = config.parties guest = parties.guest[0] if is_multi_host: hosts = parties.host else: hosts = parties.host[0] arbiter = parties.arbiter[0] if is_ovr: guest_train_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } else: guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } train_line = [] # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) # define DataIO components if is_dense: dataio_0 = DataIO(name="dataio_0", output_format='dense') else: dataio_0 = DataIO(name="dataio_0", output_format='sparse') # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True) # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) train_line.append(dataio_0) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) train_line.append(intersection_0) last_cpn = None if has_validate: reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) pipeline.add_component(reader_1) last_cpn = reader_1 for cpn in train_line: cpn_name = cpn.name new_name = "_".join(cpn_name.split('_')[:-1] + ['1']) validate_cpn = type(cpn)(name=new_name) if hasattr(cpn.output, "model"): pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data), model=Model(cpn.output.model)) else: pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data)) last_cpn = validate_cpn hetero_lr_0 = HeteroLR(**lr_param) if has_validate: pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data, validate_data=last_cpn.output.data)) else: pipeline.add_component( hetero_lr_0, data=Data(train_data=intersection_0.output.data)) if is_cv: pipeline.compile() return pipeline evaluation_data = [hetero_lr_0.output.data] if has_validate: hetero_lr_1 = HeteroLR(name='hetero_lr_1') pipeline.add_component(hetero_lr_1, data=Data(test_data=last_cpn.output.data), model=Model(hetero_lr_0.output.model)) evaluation_data.append(hetero_lr_1.output.data) if need_evaluation: evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=evaluation_data)) pipeline.compile() return pipeline
def make_feature_engineering_dsl(config, namespace, lr_param, is_multi_host=False, has_validate=False, is_cv=False, is_ovr=False): parties = config.parties guest = parties.guest[0] if is_multi_host: hosts = parties.host else: hosts = parties.host[0] arbiter = parties.arbiter[0] if is_ovr: guest_train_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } else: guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } train_line = [] # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) train_line.append(dataio_0) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) train_line.append(intersection_0) feature_scale_0 = FeatureScale(name='feature_scale_0', method="standard_scale", need_run=True) pipeline.add_component(feature_scale_0, data=Data(data=intersection_0.output.data)) train_line.append(feature_scale_0) binning_param = { "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "adjustment_factor": 0.5, "local_only": False, "need_run": True, "transform_param": { "transform_cols": -1, "transform_type": "bin_num" } } hetero_feature_binning_0 = HeteroFeatureBinning( name='hetero_feature_binning_0', **binning_param) pipeline.add_component(hetero_feature_binning_0, data=Data(data=feature_scale_0.output.data)) train_line.append(hetero_feature_binning_0) selection_param = { "select_col_indexes": -1, "filter_methods": ["manually", "iv_value_thres", "iv_percentile"], "manually_param": { "filter_out_indexes": None }, "iv_value_param": { "value_threshold": 1.0 }, "iv_percentile_param": { "percentile_threshold": 0.9 }, "need_run": True } hetero_feature_selection_0 = HeteroFeatureSelection( name='hetero_feature_selection_0', **selection_param) pipeline.add_component( hetero_feature_selection_0, data=Data(data=hetero_feature_binning_0.output.data), model=Model(isometric_model=[hetero_feature_binning_0.output.model])) train_line.append(hetero_feature_selection_0) onehot_param = { "transform_col_indexes": -1, "transform_col_names": None, "need_run": True } one_hot_encoder_0 = OneHotEncoder(name='one_hot_encoder_0', **onehot_param) pipeline.add_component( one_hot_encoder_0, data=Data(data=hetero_feature_selection_0.output.data)) train_line.append(one_hot_encoder_0) last_cpn = None if has_validate: reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) pipeline.add_component(reader_1) last_cpn = reader_1 for cpn in train_line: cpn_name = cpn.name new_name = "_".join(cpn_name.split('_')[:-1] + ['1']) validate_cpn = type(cpn)(name=new_name) if hasattr(cpn.output, "model"): pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data), model=Model(cpn.output.model)) else: pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data)) last_cpn = validate_cpn hetero_lr_0 = HeteroLR(**lr_param) if has_validate: pipeline.add_component(hetero_lr_0, data=Data( train_data=one_hot_encoder_0.output.data, validate_data=last_cpn.output.data)) else: pipeline.add_component( hetero_lr_0, data=Data(train_data=one_hot_encoder_0.output.data)) if is_cv: pipeline.compile() return pipeline evaluation_data = [hetero_lr_0.output.data] if has_validate: hetero_lr_1 = HeteroLR(name='hetero_lr_1') pipeline.add_component(hetero_lr_1, data=Data(test_data=last_cpn.output.data), model=Model(hetero_lr_0.output.model)) evaluation_data.append(hetero_lr_1.output.data) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=evaluation_data)) pipeline.compile() return pipeline
def make_normal_dsl(config, namespace): parties = config.parties guest = parties.guest[0] hosts = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True) scale_0 = FeatureScale(name='scale_0') homo_sbt_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) # define Intersection components pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_sbt_0, data=Data(train_data=scale_0.output.data)) selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "homo_sbt_filter" ], "sbt_param": { "metrics": "feature_importance", "filter_type": "threshold", "take_high": True, "threshold": 0.03 }} feature_selection_0 = HeteroFeatureSelection(**selection_param) param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) pipeline.add_component(feature_selection_0, data=Data(data=scale_0.output.data), model=Model(isometric_model=homo_sbt_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=feature_selection_0.output.data)) evaluation_0 = Evaluation(name='evaluation_0') pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() return pipeline
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = [{ "name": "breast_hetero_host", "namespace": f"experiment{namespace}" }, { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" }] # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts[0]).algorithm_param(table=host_train_data[0]) reader_0.get_party_instance( role='host', party_id=hosts[1]).algorithm_param(table=host_train_data[1]) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=hosts[0]).algorithm_param(with_label=False) dataio_0.get_party_instance( role='host', party_id=hosts[1]).algorithm_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") param = {"k": 3, "max_iter": 10} hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param) evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_kmeans_0, data=Data(train_data=intersection_0.output.data)) # print(f"data: {hetero_kmeans_0.output.data.data[0]}") pipeline.add_component(evaluation_0, data=Data(data=hetero_kmeans_0.output.data.data[0])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print(pipeline.get_component("hetero_kmeans_0").get_summary())
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') data_transform_1 = DataTransform(name="data_transform_1", output_format='dense') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) lr_param = { "name": "hetero_sshe_lr_0", "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "callback_param": { "callbacks": ["EarlyStopping", "PerformanceEvaluate"], "validation_freqs": 1, "early_stopping_rounds": 3 }, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "reveal_strategy": "respectively", "reveal_every_iter": True } hetero_sshe_lr_0 = HeteroSSHELR(**lr_param) pipeline.add_component(hetero_sshe_lr_0, data=Data(train_data=intersection_0.output.data, validate_data=intersection_1.output.data)) evaluation_data = [hetero_sshe_lr_0.output.data] hetero_sshe_lr_1 = HeteroSSHELR(name='hetero_sshe_lr_1') pipeline.add_component(hetero_sshe_lr_1, data=Data(test_data=intersection_1.output.data), model=Model(hetero_sshe_lr_0.output.model)) evaluation_data.append(hetero_sshe_lr_1.output.data) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=evaluation_data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary()) prettify(pipeline.get_component("evaluation_0").get_summary()) return pipeline
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host_0 = parties.host[0] host_1 = parties.host[1] # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data_0 = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } host_train_data_1 = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data_0 = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } host_validate_data_1 = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest, host=[host_0, host_1]) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host_0).component_param(table=host_train_data_0) reader_0.get_party_instance( role="host", party_id=host_1).component_param(table=host_train_data_1) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host_0).component_param(table=host_validate_data_0) reader_1.get_party_instance( role="host", party_id=host_1).component_param(table=host_validate_data_1) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host_0).component_param(with_label=False) data_transform_0.get_party_instance( role="host", party_id=host_1).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host_0).component_param(with_label=False) data_transform_1.get_party_instance( role="host", party_id=host_1).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_secure_boost_0 = HeteroFastSecureBoost( name="hetero_fast_secure_boost_0", num_trees=3, tree_num_per_party=1, task_type='classification', objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "Paillier"}, tree_param={"max_depth": 3}, validation_freqs=1, work_mode='mix') # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_fast_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component( evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data)) pipeline.compile() pipeline.fit() print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary())
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, pipeline.get_component('evaluation_0').get_summary()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "heart_nonscaled_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "heart_nonscaled_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "heart_nonscaled_hetero_test", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "heart_nonscaled_hetero_test", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=host).algorithm_param(table=host_eval_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense", label_name='target') # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") homo_onehot_param = { "transform_col_indexes": [1, 2, 5, 6, 8, 10, 11, 12], "transform_col_names": [], "need_alignment": True } homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', **homo_onehot_param) homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1') scale_0 = FeatureScale(name='scale_0', method="standard_scale") scale_1 = FeatureScale(name='scale_1') homo_lr_param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 3, "early_stop": "diff", "batch_size": 500, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": "Paillier" }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **homo_lr_param) homo_lr_1 = HomoLR(name='homo_lr_1') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set dataio_1 to replicate model from dataio_0 pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_onehot_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_onehot_1, data=Data(data=dataio_1.output.data), model=Model(homo_onehot_0.output.model)) pipeline.add_component(scale_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(scale_1, data=Data(data=homo_onehot_1.output.data), model=Model(scale_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(test_data=scale_1.output.data), model=Model(homo_lr_0.output.model)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).algorithm_param(need_run=False) pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data])) pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1") evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(homo_secureboost_1, data=Data(test_data=dataio_1.output.data), model=Model(homo_secureboost_0.output.model)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() sbt_0_data = pipeline.get_component( "homo_secureboost_0").get_output_data().get("data") sbt_1_data = pipeline.get_component( "homo_secureboost_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = { "sbt_train": desc_sbt_0, "sbt_validate": desc_sbt_1 } elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_validate_data["name"], "host": host_validate_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_homo_test", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_homo_test", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name='data_transform_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role='host', party_id=host).component_param(with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={"max_depth": 3}, validation_freqs=1) evaluation_0 = Evaluation(name='evaluation_0', eval_type='binary') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data( train_data=data_transform_0.output.data, validate_data=data_transform_1.output.data)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component([data_transform_0, homo_secureboost_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data })) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = {"name": "student_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "student_hetero_host", "namespace": f"experiment{namespace}"} guest_validate_data = {"name": "student_hetero_guest", "namespace": f"experiment{namespace}"} host_validate_data = {"name": "student_hetero_host", "namespace": f"experiment{namespace}"} # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance(role="host", party_id=host).algorithm_param(table=host_train_data) reader_1.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_validate_data) reader_1.get_party_instance(role="host", party_id=host).algorithm_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense") dataio_1.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost(name="hetero_secure_boost_0", num_trees=5, task_type="regression", objective_param={"objective": "lse"}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": 3}, validation_freqs=1, early_stopping_rounds=1) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secure_boost_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_secure_boost_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_secure_boost_0 = HeteroFastSecureBoost( name="hetero_fast_secure_boost_0", num_trees=4, tree_num_per_party=1, task_type='classification', objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": 3}, validation_freqs=1, work_mode='mix') # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component( evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) print("fitting hetero fast secureboost done, result:") print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary()) # predict # deploy required components pipeline.deploy_component( [dataio_0, intersect_0, hetero_fast_secure_boost_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).algorithm_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).algorithm_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param( with_label=True, output_format="dense") dataio_1.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_sbt = HeteroFastSecureBoost( name="hetero_fast_sbt_0", num_trees=10, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": 3}, validation_freqs=1, learning_rate=0.1, tree_num_per_party=1, work_mode='mix') # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_sbt, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) return {}, pipeline.get_component("evaluation_0").get_summary()
# # Copyright 2019 The FATE Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from pipeline.component.evaluation import Evaluation a = Evaluation(name="evaluation_0") print(a.output.data)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "paillier"}, tree_param={"max_depth": 3}, validation_freqs=1) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi") evaluation_1 = Evaluation(name="evaluation_1", eval_type="multi") # transformer transformer_0 = SBTTransformer(name='sbt_transformer_0', dense_format=True) # local baseline def get_local_baseline(idx): return LocalBaseline(name="local_baseline_{}".format(idx), model_name="LogisticRegression", model_opts={ "penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "lbfgs", "max_iter": 50 }) local_baseline_0 = get_local_baseline(0) local_baseline_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) local_baseline_1 = get_local_baseline(1) local_baseline_1.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_1.get_party_instance( role='host', party_id=host).component_param(need_run=False) evaluation_1.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component( transformer_0, data=Data(data=intersect_0.output.data), model=Model(isometric_model=hetero_secure_boost_0.output.model)) pipeline.compile() pipeline.fit()