def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, encrypt_param={"method": "Paillier"}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, learning_rate=param['learning_rate']) hetero_secure_boost_1 = HeteroSecureBoost(name="hetero_secure_boost_1") # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(hetero_secure_boost_1, data=Data(test_data=intersect_1.output.data), model=Model(hetero_secure_boost_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secure_boost_0.output.data)) pipeline.compile() pipeline.fit() sbt_0_data = pipeline.get_component( "hetero_secure_boost_0").get_output_data().get("data") sbt_1_data = pipeline.get_component( "hetero_secure_boost_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = { "hetero_sbt_train": desc_sbt_0, "hetero_sbt_validate": desc_sbt_1 } elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label) } metric_summary["distribution_metrics"] = {"hetero_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label) } metric_summary["distribution_metrics"] = {"hetero_sbt": metric_sbt} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).algorithm_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).algorithm_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param( with_label=True, output_format="dense") dataio_1.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_secure_boost_0 = HeteroFastSecureBoost( name="hetero_fast_secure_boost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "iterativeAffine"}, guest_depth=1, host_depth=2, tree_param={"max_depth": 3}, validation_freqs=1, work_mode='layered') # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component( evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "Paillier"}, tree_param={"max_depth": 3}, validation_freqs=1) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secure_boost_0.output.data)) pipeline.compile() pipeline.fit() print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_secure_boost_0").get_summary())
def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_set = param.get("data_guest").split('/')[-1] if data_set == "default_credit_hetero_guest.csv": guest_data_table = 'default_credit_hetero_guest' host_data_table = 'default_credit_hetero_host' elif data_set == 'breast_hetero_guest.csv': guest_data_table = 'breast_hetero_guest' host_data_table = 'breast_hetero_host' elif data_set == 'give_credit_hetero_guest.csv': guest_data_table = 'give_credit_hetero_guest' host_data_table = 'give_credit_hetero_host' elif data_set == 'epsilon_5k_hetero_guest.csv': guest_data_table = 'epsilon_5k_hetero_guest' host_data_table = 'epsilon_5k_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = { "name": guest_data_table, "namespace": f"experiment{namespace}" } host_train_data = { "name": host_data_table, "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { "validation_freqs": None, "early_stopping_rounds": None, } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "early_stop": "diff", "tol": 1e-5, "floating_point_precision": param.get("floating_point_precision"), "init_param": { "init_method": param.get("init_method", 'random_uniform'), "random_seed": param.get("random_seed", 103) } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get( "data") lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get( "data") lr_0_score = extract_data(lr_0_data, "predict_result") lr_0_label = extract_data(lr_0_data, "label") lr_1_score = extract_data(lr_1_data, "predict_result") lr_1_label = extract_data(lr_1_data, "label") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) result_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label), "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( lr_0_score, lr_1_score, lr_0_label, lr_1_label) } result_summary["distribution_metrics"] = {"hetero_lr": metric_lr} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, result_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0", with_label=True, output_format="dense") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 3, "early_stop": "diff", "batch_size": 320, "learning_rate": 0.15, "decay": 1.0, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 5, "shuffle": True, "random_seed": 33, "need_cv": True } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False))
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "Paillier"}, tree_param={"max_depth": 3}, validation_freqs=1, EINI_inference=True, EINI_random_mask=True) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secure_boost_0.output.data)) pipeline.compile() pipeline.fit() print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_secure_boost_0").get_summary()) print('start to predict') # predict # deploy required components pipeline.deploy_component( [data_transform_0, intersect_0, hetero_secure_boost_0, evaluation_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict() predict_result = predict_pipeline.get_component( "hetero_secure_boost_0").get_output_data() print("Showing 10 data of predict result") for ret in predict_result["data"][:10]: print(ret)
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"} # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) lr_param = { "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros", "fit_intercept": True }, "encrypt_param": { "key_length": 1024 }, "reveal_strategy": "respectively", "reveal_every_iter": True, "callback_param": { "callbacks": ["ModelCheckpoint"], "metrics": None, "use_first_metric_only": False, "save_freq": 1 } } hetero_sshe_lr_0 = HeteroSSHELR(name="hetero_sshe_lr_0", max_iter=3, **lr_param) hetero_sshe_lr_1 = HeteroSSHELR(name="hetero_sshe_lr_1", max_iter=30, **lr_param) pipeline.add_component(hetero_sshe_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_sshe_lr_1, data=Data(train_data=intersection_0.output.data), model=Model(model=hetero_sshe_lr_0.output.model)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=hetero_sshe_lr_1.output.data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary()) prettify(pipeline.get_component("hetero_sshe_lr_1").get_summary()) prettify(pipeline.get_component("evaluation_0").get_summary()) return pipeline
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": param["guest_table_name"], "namespace": f"experiment{namespace}" } host_train_data = { "name": param["host_table_name"], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=param["epochs"], interactive_layer_lr=param["learning_rate"], batch_size=param["batch_size"], early_stop="diff") hetero_nn_0.add_bottom_model( Dense(units=param["bottom_layer_units"], input_shape=(10, ), activation="tanh", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.set_interactve_layer( Dense(units=param["interactive_layer_units"], input_shape=(param["bottom_layer_units"], ), activation="relu", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.add_top_model( Dense(units=param["top_layer_units"], input_shape=(param["interactive_layer_units"], ), activation=param["top_act"], kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) opt = getattr(optimizers, param["opt"])(lr=param["learning_rate"]) hetero_nn_0.compile(optimizer=opt, metrics=param["metrics"], loss=param["loss"]) hetero_nn_1 = HeteroNN(name="hetero_nn_1") if param["loss"] == "categorical_crossentropy": eval_type = "multi" else: eval_type = "binary" evaluation_0 = Evaluation(name="evaluation_0", eval_type=eval_type) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) nn_0_data = pipeline.get_component("hetero_nn_0").get_output_data().get( "data") nn_1_data = pipeline.get_component("hetero_nn_1").get_output_data().get( "data") nn_0_score = extract_data(nn_0_data, "predict_result") nn_0_label = extract_data(nn_0_data, "label") nn_1_score = extract_data(nn_1_data, "predict_result") nn_1_label = extract_data(nn_1_data, "label") nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True) nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) if eval_type == "binary": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label), "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( nn_0_score, nn_1_score, nn_0_label, nn_1_label) } metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn} elif eval_type == "multi": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label) } metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_test_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_eval_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_test_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") reader_1 = Reader(name="reader_1") reader_2 = Reader(name="reader_2") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_2.get_party_instance( role='guest', party_id=guest).component_param(table=guest_test_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_eval_data) reader_2.get_party_instance( role='host', party_id=host).component_param(table=host_test_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") # start component numbering at 1 dataio_2 = DataIO(name="dataio_2") # start component numbering at 2 param = { "with_label": True, "label_name": "y", "label_type": "int", "output_format": "dense", "missing_fill": True, "missing_fill_method": "mean", "outlier_replace": False, "outlier_replace_method": "designated", "outlier_replace_value": 0.66, "outlier_impute": "-9999" } # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(**param) # get and configure DataIO party instance of host dataio_1.get_party_instance(role='guest', party_id=guest).component_param(**param) dataio_2.get_party_instance(role='guest', party_id=guest).component_param(**param) param = { "input_format": "tag", "with_label": False, "tag_with_value": True, "delimitor": ";", "output_format": "dense" } dataio_0.get_party_instance(role='host', party_id=host).component_param(**param) dataio_1.get_party_instance(role='host', party_id=host).component_param(**param) dataio_2.get_party_instance(role='host', party_id=host).component_param(**param) # define Intersection components intersection_0 = Intersection(name="intersection_0", intersect_method="raw") intersection_1 = Intersection(name="intersection_1", intersect_method="raw") intersection_2 = Intersection(name="intersection_2", intersect_method="raw") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv", "init_bucket_method": "quantile" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) statistic_0 = DataStatistics(name='statistic_0') param = { "name": 'hetero_feature_selection_0', "filter_methods": ["unique_value", "iv_filter", "statistic_filter"], "unique_param": { "eps": 1e-6 }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.1] }, "statistic_param": { "metrics": ["coefficient_of_variance", "skewness"], "filter_type": ["threshold", "threshold"], "take_high": [True, False], "threshold": [0.001, -0.01] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) hetero_feature_selection_1 = HeteroFeatureSelection( name='hetero_feature_selection_1') hetero_feature_selection_2 = HeteroFeatureSelection( name='hetero_feature_selection_2') param = {"name": "hetero_scale_0", "method": "standard_scale"} hetero_scale_0 = FeatureScale(**param) hetero_scale_1 = FeatureScale(name='hetero_scale_1') hetero_scale_2 = FeatureScale(name='hetero_scale_2') param = { "penalty": "L2", "optimizer": "nesterov_momentum_sgd", "tol": 1e-4, "alpha": 0.01, "max_iter": 5, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "validation_freqs": None, "early_stopping_rounds": None } hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(reader_2) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(dataio_2, data=Data(data=reader_2.output.data), model=Model(dataio_1.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(intersection_2, data=Data(data=dataio_2.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model( hetero_feature_selection_0.output.model)) pipeline.add_component(hetero_feature_selection_2, data=Data(data=intersection_2.output.data), model=Model( hetero_feature_selection_1.output.model)) pipeline.add_component( hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component( hetero_scale_1, data=Data(data=hetero_feature_selection_1.output.data), model=Model(hetero_scale_0.output.model)) pipeline.add_component( hetero_scale_2, data=Data(data=hetero_feature_selection_2.output.data), model=Model(hetero_scale_1.output.model)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=hetero_scale_0.output.data, validate_data=hetero_scale_1.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=hetero_scale_2.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component( evaluation_0, data=Data(data=[hetero_lr_0.output.data, hetero_lr_1.output.data])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data_0 = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_train_data_1 = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_test_data_0 = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_test_data_1 = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data_0 = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_train_data_1 = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_test_data_0 = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_test_data_1 = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") reader_1 = Reader(name="reader_1") reader_2 = Reader(name="reader_2") reader_3 = Reader(name="reader_3") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data_0) reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data_1) reader_2.get_party_instance( role='guest', party_id=guest).component_param(table=guest_test_data_0) reader_3.get_party_instance( role='guest', party_id=guest).component_param(table=guest_test_data_1) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data_0) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_train_data_1) reader_2.get_party_instance( role='host', party_id=host).component_param(table=host_test_data_0) reader_3.get_party_instance( role='host', party_id=host).component_param(table=host_test_data_1) param = {"name": "union_0", "keep_duplicate": True} union_0 = Union(**param) param = {"name": "union_1", "keep_duplicate": True} union_1 = Union(**param) param = { "input_format": "tag", "with_label": False, "tag_with_value": True, "delimitor": ";", "output_format": "dense" } # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") # start component numbering at 1 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance(role='host', party_id=host).component_param(**param) dataio_1.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) dataio_1.get_party_instance(role='host', party_id=host).component_param(**param) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) statistic_0 = DataStatistics(name='statistic_0') param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "iv_filter", "statistic_filter"], "manually_param": { "filter_out_indexes": [1, 2], "filter_out_names": ["x2", "x3"] }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.01] }, "statistic_param": { "metrics": ["coefficient_of_variance", "skewness"], "filter_type": ["threshold", "threshold"], "take_high": [True, True], "threshold": [0.001, -0.01] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) hetero_feature_selection_1 = HeteroFeatureSelection( name='hetero_feature_selection_1') param = {"name": "hetero_scale_0", "method": "standard_scale"} hetero_scale_0 = FeatureScale(**param) hetero_scale_1 = FeatureScale(name='hetero_scale_1') param = { "penalty": "L2", "validation_freqs": None, "early_stopping_rounds": None, "max_iter": 5 } hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param) evaluation_0 = Evaluation(name='evaluation_0') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(reader_2) pipeline.add_component(reader_3) pipeline.add_component( union_0, data=Data(data=[reader_0.output.data, reader_1.output.data])) pipeline.add_component( union_1, data=Data(data=[reader_2.output.data, reader_3.output.data])) pipeline.add_component(dataio_0, data=Data(data=union_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=union_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model( hetero_feature_selection_0.output.model)) pipeline.add_component( hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component( hetero_scale_1, data=Data(data=hetero_feature_selection_1.output.data), model=Model(hetero_scale_0.output.model)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=hetero_scale_0.output.data, validate_data=hetero_scale_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).algorithm_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") param = {"k": 3, "max_iter": 10} hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param) evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_kmeans_0, data=Data(train_data=intersection_0.output.data)) print(f"data: {hetero_kmeans_0.output.data.data[0]}") pipeline.add_component(evaluation_0, data=Data(data=hetero_kmeans_0.output.data.data[0])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print(pipeline.get_component("hetero_kmeans_0").get_summary())
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] guest_train_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') data_transform_1 = DataTransform(name="data_transform_1", output_format='dense') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") selection_param = { "select_col_indexes": -1, "filter_methods": ["manually"] } hetero_feature_selection_0 = HeteroFeatureSelection( name="hetero_feature_selection_0", **selection_param) hetero_feature_selection_0.get_party_instance( role='guest', party_id=guest).component_param( manually_param={"left_col_indexes": [0]}) hetero_feature_selection_1 = HeteroFeatureSelection( name="hetero_feature_selection_1") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_feature_selection_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model( hetero_feature_selection_0.output.model)) lr_param = { "name": "hetero_sshe_lr_0", "penalty": None, "optimizer": "sgd", "tol": 0.0001, "alpha": 0.01, "max_iter": 1, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "random_uniform" }, "reveal_strategy": "encrypted_reveal_in_host", "reveal_every_iter": False } hetero_sshe_lr_0 = HeteroSSHELR(**lr_param) pipeline.add_component( hetero_sshe_lr_0, data=Data(train_data=hetero_feature_selection_0.output.data, validate_data=hetero_feature_selection_1.output.data)) evaluation_data = [hetero_sshe_lr_0.output.data] hetero_sshe_lr_1 = HeteroSSHELR(name='hetero_sshe_lr_1') pipeline.add_component( hetero_sshe_lr_1, data=Data(test_data=hetero_feature_selection_1.output.data), model=Model(hetero_sshe_lr_0.output.model)) evaluation_data.append(hetero_sshe_lr_1.output.data) evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi") pipeline.add_component(evaluation_0, data=Data(data=evaluation_data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary()) prettify(pipeline.get_component("evaluation_0").get_summary()) return pipeline
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') data_transform_1 = DataTransform(name="data_transform_1", output_format='dense') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) lr_param = { "name": "hetero_sshe_lr_0", "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "callback_param": { "callbacks": ["EarlyStopping", "PerformanceEvaluate"], "validation_freqs": 1, "early_stopping_rounds": 3 }, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "reveal_strategy": "respectively", "reveal_every_iter": True } hetero_sshe_lr_0 = HeteroSSHELR(**lr_param) pipeline.add_component(hetero_sshe_lr_0, data=Data(train_data=intersection_0.output.data, validate_data=intersection_1.output.data)) evaluation_data = [hetero_sshe_lr_0.output.data] hetero_sshe_lr_1 = HeteroSSHELR(name='hetero_sshe_lr_1') pipeline.add_component(hetero_sshe_lr_1, data=Data(test_data=intersection_1.output.data), model=Model(hetero_sshe_lr_0.output.model)) evaluation_data.append(hetero_sshe_lr_1.output.data) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=evaluation_data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary()) prettify(pipeline.get_component("evaluation_0").get_summary()) return pipeline