def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = {"name": "tag_value_1000_140", "namespace": f"experiment{namespace}"} host_train_data = {"name": "tag_value_1000_140", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", input_format="tag", tag_with_value=True, with_match_id=True) data_transform_1 = DataTransform(name="data_transform_1") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(model=data_transform_0.output.model)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True) data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=100, interactive_layer_lr=0.15, batch_size=-1, early_stop="diff", selector_param={"method": "relative"}) guest_nn_0 = hetero_nn_0.get_party_instance(role='guest', party_id=guest) guest_nn_0.add_bottom_model(Dense(units=3, input_shape=(10,), activation="relu", kernel_initializer=initializers.Constant(value=1))) guest_nn_0.set_interactve_layer(Dense(units=2, input_shape=(2,), kernel_initializer=initializers.Constant(value=1))) guest_nn_0.add_top_model(Dense(units=1, input_shape=(2,), activation="sigmoid", kernel_initializer=initializers.Constant(value=1))) host_nn_0 = hetero_nn_0.get_party_instance(role='host', party_id=host) host_nn_0.add_bottom_model(Dense(units=3, input_shape=(20,), activation="relu", kernel_initializer=initializers.Constant(value=1))) host_nn_0.set_interactve_layer(Dense(units=2, input_shape=(2,), kernel_initializer=initializers.Constant(value=1))) hetero_nn_0.compile(optimizer=optimizers.SGD(lr=0.15), loss="binary_crossentropy") hetero_nn_1 = HeteroNN(name="hetero_nn_1") evaluation_0 = Evaluation(name="evaluation_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data), model=Model(model=hetero_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data)) pipeline.compile() pipeline.fit() print(hetero_nn_0.get_config(roles={"guest": [guest], "host": [host]})) print(pipeline.get_component("hetero_nn_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0", with_label=True, output_format="dense") # start component numbering at 0 homo_binning_0 = HomoFeatureBinning(name='homo_binning_0', sample_bins=1000) homo_binning_1 = HomoFeatureBinning(name='homo_binning_1', sample_bins=1000) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(homo_binning_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_binning_1, data=Data(data=data_transform_0.output.data), model=Model(model=homo_binning_0.output.model)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, label_name="motor_speed", label_type="float", output_format="dense") data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0", only_output_key=False) hetero_linr_0 = HeteroLinR(name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001, alpha=0.01, max_iter=5, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, callback_param={"callbacks": ["ModelCheckpoint"]}, init_param={"init_method": "zeros"}, floating_point_precision=23) evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1) hetero_linr_1 = HeteroLinR(name="hetero_linr_1", max_iter=15, penalty="L2", optimizer="sgd", tol=0.001, alpha=0.01, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, floating_point_precision=23 ) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_linr_1, data=Data(train_data=intersection_0.output.data), model=Model(hetero_linr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_linr_1.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_label=True, output_format="dense", label_name="y", missing_fill=False, outlier_replace=False) data_transform_1 = DataTransform(name="data_transform_1", with_label=True, output_format="dense", label_name="y", missing_fill=False, outlier_replace=False) union_0 = Union(name="union_0", allow_missing=False, need_run=True) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component( union_0, data=Data( data=[data_transform_0.output.data, data_transform_1.output.data])) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed", label_type="float", output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_data_split_0 = HeteroDataSplit(name="hetero_data_split_0", stratified=True, test_size=0.3, validate_size=0.2, split_points=[0.0, 0.2]) hetero_linr_0 = HeteroLinR(name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001, alpha=0.01, max_iter=10, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, init_param={"init_method": "zeros"}, encrypted_mode_calculator_param={"mode": "fast"}) hetero_linr_1 = HeteroLinR() pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_data_split_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=hetero_data_split_0.output.data.train_data, validate_data=hetero_data_split_0.output.data.validate_data)) pipeline.add_component(hetero_linr_1, data=Data(test_data=hetero_data_split_0.output.data.test_data), model=Model(model=hetero_linr_0.output.model)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_1.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance( role='guest', party_id=guest).algorithm_param(with_label=True) dataio_0.get_party_instance( role='host', party_id=host).algorithm_param(with_label=False) dataio_1 = DataIO(name="dataio_1") dataio_1.get_party_instance( role='guest', party_id=guest).algorithm_param(with_label=True) dataio_1.get_party_instance( role='host', party_id=host).algorithm_param(with_label=False) intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=100, validation_freqs=1, interactive_layer_lr=0.15, batch_size=-1, early_stop="diff", early_stopping_rounds=15, use_first_metric_only=True) hetero_nn_0.add_bottom_model( Dense(units=3, input_shape=(10, ), activation="relu", kernel_initializer=initializers.Constant(value=1))) hetero_nn_0.set_interactve_layer( Dense(units=2, input_shape=(2, ), kernel_initializer=initializers.Constant(value=1))) hetero_nn_0.add_top_model( Dense(units=1, input_shape=(2, ), activation="sigmoid", kernel_initializer=initializers.Constant(value=1))) hetero_nn_0.compile(optimizer=optimizers.SGD(lr=0.15), metrics=["AUC"], loss="binary_crossentropy") hetero_nn_1 = HeteroNN(name="hetero_nn_1") evaluation_0 = Evaluation(name="evaluation_0") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data, validate_data=intersection_1.output.data)) pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_1.output.data), model=Model(model=hetero_nn_0.output.model)) pipeline.add_component( evaluation_0, data=Data(data=[hetero_nn_0.output.data, hetero_nn_1.output.data])) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) print(pipeline.get_component("hetero_nn_0").get_summary()) print(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", param="./sshe_linr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] if isinstance(param, str): param = JobConfig.load_from_file(param) guest_train_data = { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense", label_name=param["label_name"], label_type="float") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "optimizer": param["optimizer"], "learning_rate": param["learning_rate"], "init_param": param["init_param"], "batch_size": param["batch_size"], "alpha": param["alpha"], "early_stop": param["early_stop"], "reveal_strategy": param["reveal_strategy"], "tol": 1e-6, "reveal_every_iter": True } hetero_sshe_linr_0 = HeteroSSHELinR(name='hetero_sshe_linr_0', **param) hetero_sshe_linr_1 = HeteroSSHELinR(name='hetero_sshe_linr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="regression", metrics=[ "r2_score", "mean_squared_error", "root_mean_squared_error", "explained_variance" ]) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_sshe_linr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_sshe_linr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_sshe_linr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_sshe_linr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) data_linr_0 = extract_data( pipeline.get_component("hetero_sshe_linr_0").get_output_data().get( "data"), "predict_result") data_linr_1 = extract_data( pipeline.get_component("hetero_sshe_linr_1").get_output_data().get( "data"), "predict_result") desc_linr_0 = regression_metric.Describe().compute(data_linr_0) desc_linr_1 = regression_metric.Describe().compute(data_linr_1) metric_summary["script_metrics"] = { "linr_train": desc_linr_0, "linr_validate": desc_linr_1 } data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_pearson_0 = HeteroPearson(name='hetero_pearson_0', column_indexes=-1) hetero_binning_0 = HeteroFeatureBinning(name='hetero_binning_0') selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["vif_filter", "correlation_filter"], "vif_param": { "threshold": 5 }, "correlation_param": { "threshold": 0.5, "select_federated": False } } hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_pearson_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_pearson_0.output.model, hetero_binning_0.output.model ])) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # predict # deploy required components pipeline.deploy_component([ dataio_0, intersection_0, hetero_binning_0, hetero_feature_selection_0 ]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data })) # run predict model predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense', with_label=True) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) lr_param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False }, "callback_param": { "callbacks": ["ModelCheckpoint", "EarlyStopping"] } } homo_lr_0 = HomoLR(name="homo_lr_0", max_iter=3, **lr_param) homo_lr_1 = HomoLR(name="homo_lr_1", max_iter=30, **lr_param) homo_lr_2 = HomoLR(name="homo_lr_2", max_iter=30, **lr_param) pipeline.add_component(homo_lr_0, data=Data(train_data=data_transform_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(train_data=data_transform_0.output.data), model=Model(model=homo_lr_0.output.model)) pipeline.add_component(homo_lr_2, data=Data(train_data=data_transform_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_1.output.data, homo_lr_2.output.data])) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("evaluation_0").get_summary()) return pipeline
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = [{ "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" }, { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" }] host_train_data = [{ "name": "motor_hetero_host", "namespace": f"experiment{namespace}" }, { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" }] pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data[0]) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data[0]) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data[1]) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_train_data[1]) data_transform_0 = DataTransform(name="data_transform_0") data_transform_1 = DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed", label_type="float", output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed", label_type="float", output_format="dense") data_transform_1.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") hetero_linr_0 = HeteroSSHELinR( name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001, alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, init_param={"init_method": "zeros"}, callback_param={ "callbacks": ["EarlyStopping", "PerformanceEvaluate"], "validation_freqs": 1, "early_stopping_rounds": 5, "metrics": ["mean_absolute_error", "root_mean_squared_error"], "use_first_metric_only": False, "save_freq": 1 }, reveal_every_iter=True, reveal_strategy="respectively") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data, validate_data=intersect_1.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_1 = DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_1.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") union_0 = Union(name="union_0") hetero_lr_0 = HeteroLR(name="hetero_lr_0", max_iter=3, early_stop="weight_diff", optimizer="nesterov_momentum_sgd", tol=1E-4, alpha=0.01, learning_rate=0.15, init_param={"init_method": "random_uniform"}) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component( data_transform_1, data=Data( data=reader_1.output.data), model=Model( data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(union_0, data=Data(data=[intersect_0.output.data, intersect_1.output.data])) pipeline.add_component(hetero_lr_0, data=Data(train_data=union_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() pipeline.fit()
def make_feature_engineering_dsl(config, namespace, lr_param, is_multi_host=False, has_validate=False, is_cv=False, is_ovr=False): parties = config.parties guest = parties.guest[0] if is_multi_host: hosts = parties.host else: hosts = parties.host[0] arbiter = parties.arbiter[0] if is_ovr: guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} guest_eval_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_eval_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} else: guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} guest_eval_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_eval_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} train_line = [] # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform(name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=False) train_line.append(data_transform_0) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) train_line.append(intersection_0) feature_scale_0 = FeatureScale(name='feature_scale_0', method="standard_scale", need_run=True) pipeline.add_component(feature_scale_0, data=Data(data=intersection_0.output.data)) train_line.append(feature_scale_0) binning_param = { "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "adjustment_factor": 0.5, "local_only": False, "need_run": True, "transform_param": { "transform_cols": -1, "transform_type": "bin_num" } } hetero_feature_binning_0 = HeteroFeatureBinning(name='hetero_feature_binning_0', **binning_param) pipeline.add_component(hetero_feature_binning_0, data=Data(data=feature_scale_0.output.data)) train_line.append(hetero_feature_binning_0) selection_param = { "select_col_indexes": -1, "filter_methods": [ "manually", "iv_value_thres", "iv_percentile" ], "manually_param": { "filter_out_indexes": None }, "iv_value_param": { "value_threshold": 1.0 }, "iv_percentile_param": { "percentile_threshold": 0.9 }, "need_run": True } hetero_feature_selection_0 = HeteroFeatureSelection(name='hetero_feature_selection_0', **selection_param) pipeline.add_component(hetero_feature_selection_0, data=Data(data=hetero_feature_binning_0.output.data), model=Model(isometric_model=[hetero_feature_binning_0.output.model])) train_line.append(hetero_feature_selection_0) onehot_param = { "transform_col_indexes": -1, "transform_col_names": None, "need_run": True } one_hot_encoder_0 = OneHotEncoder(name='one_hot_encoder_0', **onehot_param) pipeline.add_component(one_hot_encoder_0, data=Data(data=hetero_feature_selection_0.output.data)) train_line.append(one_hot_encoder_0) last_cpn = None if has_validate: reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance(role='host', party_id=hosts).component_param(table=host_eval_data) pipeline.add_component(reader_1) last_cpn = reader_1 for cpn in train_line: cpn_name = cpn.name new_name = "_".join(cpn_name.split('_')[:-1] + ['1']) validate_cpn = type(cpn)(name=new_name) if hasattr(cpn.output, "model"): pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data), model=Model(cpn.output.model)) else: pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data)) last_cpn = validate_cpn hetero_lr_0 = HeteroLR(**lr_param) if has_validate: pipeline.add_component(hetero_lr_0, data=Data(train_data=one_hot_encoder_0.output.data, validate_data=last_cpn.output.data)) else: pipeline.add_component(hetero_lr_0, data=Data(train_data=one_hot_encoder_0.output.data)) if is_cv: pipeline.compile() return pipeline evaluation_data = [hetero_lr_0.output.data] if has_validate: hetero_lr_1 = HeteroLR(name='hetero_lr_1') pipeline.add_component(hetero_lr_1, data=Data(test_data=last_cpn.output.data), model=Model(hetero_lr_0.output.model)) evaluation_data.append(hetero_lr_1.output.data) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=evaluation_data)) pipeline.compile() return pipeline
def main(config="../../config.yaml", param="./vechile_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) """ guest = 9999 host = 10000 arbiter = 9999 backend = 0 work_mode = 1 param = {"penalty": "L2", "max_iter": 5} """ data_set = param.get("data_guest").split('/')[-1] if data_set == "vehicle_scale_hetero_guest.csv": guest_data_table = 'vehicle_scale_hetero_guest' host_data_table = 'vehicle_scale_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = { "name": guest_data_table, "namespace": f"experiment{namespace}" } host_train_data = { "name": host_data_table, "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { "validation_freqs": None, "early_stopping_rounds": None, } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "early_stop": "diff", "init_param": { "init_method": param.get("init_method", 'random_uniform'), "random_seed": param.get("random_seed", 103) } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary result_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get( "data") lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get( "data") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label) } result_summary["distribution_metrics"] = {"hetero_lr": metric_lr} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, result_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment_sid{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment_sid{namespace}" } guest_eval_data = { "name": "breast_homo_guest", "namespace": f"experiment_sid{namespace}" } host_eval_data = { "name": "breast_homo_host", "namespace": f"experiment_sid{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_eval_data) # define DataTransform components data_transform_0 = DataTransform(name="data_transform_0", with_match_id=True, with_label=True, output_format="dense") data_transform_1 = DataTransform( name="data_transform_1") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') scale_1 = FeatureScale(name='scale_1') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 3, "early_stop": "diff", "batch_size": 320, "learning_rate": 0.15, "validation_freqs": 1, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(scale_1, data=Data(data=data_transform_1.output.data), model=Model(scale_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data, validate_data=scale_1.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
def main(): # parties config guest = 9999 host = 10000 arbiter = 10000 # 0 for eggroll, 1 for spark backend = Backend.EGGROLL # 0 for standalone, 1 for cluster work_mode = WorkMode.STANDALONE # use the work mode below for cluster deployment # work_mode = WorkMode.CLUSTER # specify input data name & namespace in database guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"} guest_eval_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role="host", party_id=host).algorithm_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_eval_data) reader_1.get_party_instance( role="host", party_id=host).algorithm_param(table=host_eval_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") dataio_1 = DataIO(name="dataio_1") # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest", party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role="host", party_id=host).algorithm_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") # define HeteroLR component hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="weight_diff", learning_rate=0.15, optimizer="rmsprop", max_iter=10, early_stopping_rounds=2, validation_freqs=1) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set dataio_1 to replicate model from dataio_0 pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data, validate_data=intersection_1.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary import json print( json.dumps(pipeline.get_component("hetero_lr_0").get_summary(), indent=4)) # predict # deploy required components pipeline.deploy_component([dataio_0, intersection_0, hetero_lr_0]) # initiate predict pipeline predict_pipeline = PipeLine() reader_2 = Reader(name="reader_2") reader_2.get_party_instance( role="guest", party_id=guest).algorithm_param(table=guest_eval_data) reader_2.get_party_instance( role="host", party_id=host).algorithm_param(table=host_eval_data) # add data reader onto predict pipeline predict_pipeline.add_component(reader_2) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_2.output.data })) # run predict model predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = { "name": "expect", "namespace": f"experiment{namespace}" } host_train_data = {"name": "actual", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_1 = DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=False, output_format="dense") data_transform_1.get_party_instance( role='guest', party_id=guest).component_param(with_label=False, output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False, output_format="dense") data_transform_1.get_party_instance( role='host', party_id=host).component_param(with_label=False, output_format="dense") psi_0 = PSI(name='psi_0', max_bin_num=20) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(psi_0, data=Data( train_data=data_transform_0.output.data, validate_data=data_transform_1.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } host_train_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_eval_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0", with_label=True, output_format="dense", label_name='y', data_type="str") # start component numbering at 0 data_transform_1 = DataTransform(name="data_transform_1") homo_onehot_param = { "transform_col_indexes": -1, "transform_col_names": [], "need_alignment": True } homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', **homo_onehot_param) homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1') scale_0 = FeatureScale(name='scale_0', method="standard_scale") scale_1 = FeatureScale(name='scale_1') homo_lr_param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 3, "early_stop": "diff", "batch_size": 500, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": "Paillier" }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **homo_lr_param) homo_lr_1 = HomoLR(name='homo_lr_1') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data_transform_1 to replicate model from data_transform_0 pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(homo_onehot_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_onehot_1, data=Data(data=data_transform_1.output.data), model=Model(homo_onehot_0.output.model)) pipeline.add_component(scale_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(scale_1, data=Data(data=homo_onehot_1.output.data), model=Model(scale_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(test_data=scale_1.output.data), model=Model(homo_lr_0.output.model)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data])) pipeline.compile() # fit model pipeline.fit() # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
def make_normal_dsl(config, namespace, selection_param, is_multi_host=False, host_dense_output=True, **kwargs): parties = config.parties guest = parties.guest[0] if is_multi_host: hosts = parties.host else: hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) last_cpn = intersection_0 selection_include_model = [] if 'binning_param' in kwargs: hetero_feature_binning_0 = HeteroFeatureBinning( **kwargs['binning_param']) pipeline.add_component(hetero_feature_binning_0, data=Data(data=last_cpn.output.data)) selection_include_model.append(hetero_feature_binning_0) # last_cpn = hetero_feature_binning_0 if 'statistic_param' in kwargs: # print(f"param: {kwargs['statistic_param']}, kwargs: {kwargs}") statistic_0 = DataStatistics(**kwargs['statistic_param']) pipeline.add_component(statistic_0, data=Data(data=last_cpn.output.data)) # last_cpn = statistic_0 selection_include_model.append(statistic_0) if 'psi_param' in kwargs: reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) data_transform_1 = DataTransform(name="data_transform_1") intersection_1 = Intersection(name="intersection_1") pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) psi_0 = PSI(**kwargs['psi_param']) pipeline.add_component(psi_0, data=Data( train_data=intersection_0.output.data, validate_data=intersection_1.output.data)) # last_cpn = statistic_0 selection_include_model.append(psi_0) if 'sbt_param' in kwargs: secureboost_0 = HeteroSecureBoost(**kwargs['sbt_param']) pipeline.add_component( secureboost_0, data=Data(train_data=intersection_0.output.data)) selection_include_model.append(secureboost_0) if "fast_sbt_param" in kwargs: fast_sbt_0 = HeteroFastSecureBoost(**kwargs['fast_sbt_param']) pipeline.add_component( fast_sbt_0, data=Data(train_data=intersection_0.output.data)) selection_include_model.append(fast_sbt_0) hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model( isometric_model=[x.output.model for x in selection_include_model])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() return pipeline
def make_single_predict_pipeline(config, namespace, selection_param, is_multi_host=False, **kwargs): parties = config.parties guest = parties.guest[0] if is_multi_host: hosts = parties.host else: hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) data_transform_1 = DataTransform(name="data_transform_1") intersection_1 = Intersection(name="intersection_1") pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) sample_0 = FederatedSample(name='sample_0', fractions=0.9) pipeline.add_component(sample_0, data=Data(data=intersection_0.output.data)) if "binning_param" not in kwargs: raise ValueError("Binning_param is needed") hetero_feature_binning_0 = HeteroFeatureBinning(**kwargs['binning_param']) pipeline.add_component(hetero_feature_binning_0, data=Data(data=sample_0.output.data)) hetero_feature_binning_1 = HeteroFeatureBinning( name='hetero_feature_binning_1') pipeline.add_component(hetero_feature_binning_1, data=Data(data=intersection_1.output.data), model=Model(hetero_feature_binning_0.output.model)) hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param) pipeline.add_component( hetero_feature_selection_0, data=Data(data=hetero_feature_binning_0.output.data), model=Model(isometric_model=[hetero_feature_binning_0.output.model])) hetero_feature_selection_1 = HeteroFeatureSelection( name='hetero_feature_selection_1') pipeline.add_component( hetero_feature_selection_1, data=Data(data=hetero_feature_binning_1.output.data), model=Model(hetero_feature_selection_0.output.model)) scale_0 = FeatureScale(name='scale_0') scale_1 = FeatureScale(name='scale_1') pipeline.add_component( scale_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component( scale_1, data=Data(data=hetero_feature_selection_1.output.data), model=Model(scale_0.output.model)) pipeline.compile() return pipeline
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "Paillier"}, tree_param={"max_depth": 3}, validation_freqs=1, boosting_strategy='mix') # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secure_boost_0.output.data)) pipeline.compile() pipeline.fit() print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_secure_boost_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform(name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "iv_filter"], "manually_param": { "filter_out_indexes": [1] }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.001] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) param = { "k": 3, "max_iter": 10 } hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param) evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=hetero_feature_binning_0.output.model)) pipeline.add_component(hetero_kmeans_0, data=Data(train_data=hetero_feature_selection_0.output.data)) print(f"data: {hetero_kmeans_0.output.data.data[0]}") pipeline.add_component(evaluation_0, data=Data(data=hetero_kmeans_0.output.data.data[0])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print(pipeline.get_component("hetero_kmeans_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = [{ "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" }, { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" }] host_train_data = [{ "name": "motor_hetero_host", "namespace": f"experiment{namespace}" }, { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" }] pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data[0]) reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data[0]) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data[1]) reader_1.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data[1]) dataio_0 = DataIO(name="dataio_0") dataio_1 = DataIO(name="dataio_1") dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param( with_label=True, label_name="motor_speed", label_type="float", output_format="dense") dataio_0.get_party_instance( role='host', party_id=host).algorithm_param(with_label=False) dataio_1.get_party_instance(role='guest', party_id=guest).algorithm_param( with_label=True, label_name="motor_speed", label_type="float", output_format="dense") dataio_1.get_party_instance( role='host', party_id=host).algorithm_param(with_label=False) intersection_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") hetero_linr_0 = HeteroLinR( name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001, alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, init_param={"init_method": "zeros"}, encrypted_mode_calculator_param={"mode": "fast"}, validation_freqs=1, early_stopping_rounds=5, metrics=["mean_absolute_error", "root_mean_squared_error"], use_first_metric_only=False) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data, validate_data=intersect_1.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment_sid{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment_sid{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).\ set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_match_id=True) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") feature_scale_0 = FeatureScale(name='feature_scale_0', method="standard_scale", need_run=True) binning_param = { "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "adjustment_factor": 0.5, "local_only": False, "need_run": True, "transform_param": { "transform_cols": -1, "transform_type": "bin_num" } } hetero_feature_binning_0 = HeteroFeatureBinning( name='hetero_feature_binning_0', **binning_param) statistic_0 = DataStatistics(name='statistic_0', statistics=["95%"]) pearson_0 = HeteroPearson(name='pearson_0', column_indexes=-1) onehot_0 = OneHotEncoder(name='onehot_0') selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "manually", "unique_value", "iv_filter", "coefficient_of_variation_value_thres", "outlier_cols" ], "manually_param": { "filter_out_indexes": [0, 1, 2], "filter_out_names": ["x3"] }, "unique_param": { "eps": 1e-06 }, "iv_param": { "metrics": ["iv", "iv", "iv"], "filter_type": ["threshold", "top_k", "top_percentile"], "threshold": [0.001, 100, 0.99] }, "variance_coe_param": { "value_threshold": 0.3 }, "outlier_param": { "percentile": 0.95, "upper_threshold": 2.0 } } hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param) lr_param = { "name": "hetero_lr_0", "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": 320, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "sqn_param": { "update_interval_L": 3, "memory_M": 5, "sample_size": 5000, "random_seed": None }, "cv_param": { "n_splits": 5, "shuffle": False, "random_seed": 103, "need_cv": False } } hetero_lr_0 = HeteroLR(**lr_param) evaluation_0 = Evaluation(name='evaluation_0') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(feature_scale_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=feature_scale_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=feature_scale_0.output.data)) pipeline.add_component(pearson_0, data=Data(data=feature_scale_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=hetero_feature_binning_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component( onehot_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=onehot_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_set = param.get("data_guest").split('/')[-1] if data_set == "default_credit_hetero_guest.csv": guest_data_table = 'default_credit_hetero_guest' host_data_table = 'default_credit_hetero_host' elif data_set == 'breast_hetero_guest.csv': guest_data_table = 'breast_hetero_guest' host_data_table = 'breast_hetero_host' elif data_set == 'give_credit_hetero_guest.csv': guest_data_table = 'give_credit_hetero_guest' host_data_table = 'give_credit_hetero_host' elif data_set == 'epsilon_5k_hetero_guest.csv': guest_data_table = 'epsilon_5k_hetero_guest' host_data_table = 'epsilon_5k_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"} host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform(name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "shuffle": False, "masked_rate": 0, "early_stop": "diff", "tol": 1e-5, "floating_point_precision": param.get("floating_point_precision"), "init_param": { "init_method": param.get("init_method", 'random_uniform'), "random_seed": param.get("random_seed", 103) } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters() pipeline.fit(job_parameters) lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get("data") lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get("data") lr_0_score = extract_data(lr_0_data, "predict_result") lr_0_label = extract_data(lr_0_data, "label") lr_1_score = extract_data(lr_1_data, "predict_result") lr_1_label = extract_data(lr_1_data, "label") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label), "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(lr_0_score, lr_1_score, lr_0_label, lr_1_label)} result_summary["distribution_metrics"] = {"hetero_lr": metric_lr} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, result_summary
def make_normal_dsl(config, namespace, lr_param, is_multi_host=False, has_validate=False, is_cv=False, is_ovr=False, is_dense=True, need_evaluation=True): parties = config.parties guest = parties.guest[0] if is_multi_host: hosts = parties.host else: hosts = parties.host[0] arbiter = parties.arbiter[0] if is_ovr: guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} guest_eval_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_eval_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} else: guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} guest_eval_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_eval_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} train_line = [] # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) # define DataTransform components if is_dense: data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') else: data_transform_0 = DataTransform(name="data_transform_0", output_format='sparse') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=False) train_line.append(data_transform_0) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) train_line.append(intersection_0) last_cpn = None if has_validate: reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance(role='host', party_id=hosts).component_param(table=host_eval_data) pipeline.add_component(reader_1) last_cpn = reader_1 for cpn in train_line: cpn_name = cpn.name new_name = "_".join(cpn_name.split('_')[:-1] + ['1']) validate_cpn = type(cpn)(name=new_name) if hasattr(cpn.output, "model"): pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data), model=Model(cpn.output.model)) else: pipeline.add_component(validate_cpn, data=Data(data=last_cpn.output.data)) last_cpn = validate_cpn hetero_lr_0 = HeteroLR(**lr_param) if has_validate: pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data, validate_data=last_cpn.output.data)) else: pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) if is_cv: pipeline.compile() return pipeline evaluation_data = [hetero_lr_0.output.data] if has_validate: hetero_lr_1 = HeteroLR(name='hetero_lr_1') pipeline.add_component(hetero_lr_1, data=Data(test_data=last_cpn.output.data), model=Model(hetero_lr_0.output.model)) evaluation_data.append(hetero_lr_1.output.data) if need_evaluation: evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=evaluation_data)) pipeline.compile() return pipeline
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"} # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) param = { "model_id": "arbiter-9999#guest-10000#host-9999#model", "model_version": "202108311438379703480", "component_name": "hetero_lr_0", "step_index": 2 } model_loader_0 = ModelLoader(name="model_loader_0", **param) lr_param = { "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros", "fit_intercept": True }, "encrypt_param": { "key_length": 1024 }, "callback_param": { "callbacks": ["ModelCheckpoint"], "validation_freqs": 1, "early_stopping_rounds": 1, "metrics": None, "use_first_metric_only": False, "save_freq": 1 } } hetero_lr_0 = HeteroLR(name="hetero_lr_0", max_iter=30, **lr_param) pipeline.add_component(model_loader_0) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data), model=Model(model=model_loader_0.output.model)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("hetero_lr_0").get_summary()) prettify(pipeline.get_component("evaluation_0").get_summary()) return pipeline
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} guest_validate_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} host_validate_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) data_transform_0, data_transform_1 = DataTransform(name="data_transform_0"), DataTransform(name='data_transform_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_0.get_party_instance( role='host', party_id=host).component_param( with_label=False, output_format="dense") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_1.get_party_instance( role='host', party_id=host).component_param( with_label=True, output_format="dense") intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") param = { "method": "quantile", "optimal_binning_param": { "metric_method": "gini", "min_bin_pct": 0.05, "max_bin_pct": 0.8, "init_bucket_method": "quantile", "init_bin_nums": 100, "mixture": True }, "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": [0, 1, 2], "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } hetero_feature_binning_0 = HeteroFeatureBinning(name="hetero_feature_binning_0", **param) hetero_feature_binning_1 = HeteroFeatureBinning(name='hetero_feature_binning_1') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component( data_transform_1, data=Data( data=reader_1.output.data), model=Model( data_transform_0.output.model)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_feature_binning_1, data=Data(data=intersection_1.output.data), model=Model(hetero_feature_binning_0.output.model)) pipeline.compile() pipeline.fit() # predict # deploy required components pipeline.deploy_component([data_transform_0, intersection_0, hetero_feature_binning_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data( predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data})) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_label=True) data_transform_1 = DataTransform(name="data_transform_1") federated_sample_0 = FederatedSample(name="federated_sample_0", mode="stratified", method="downsample", fractions=[[0, 1.0], [1, 1.0]], task_type="h**o") homo_binning_0 = HomoFeatureBinning(name='homo_binning_0', sample_bins=10, method="recursive_query") homo_binning_1 = HomoFeatureBinning(name='homo_binning_1') homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', need_alignment=True) homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1') homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", tol=0.0001, alpha=1.0, optimizer="rmsprop", max_iter=5) homo_lr_1 = HomoLR(name="homo_lr_1") local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "lbfgs", "max_iter": 5, "multi_class": "ovr"}) local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=True) local_baseline_1 = LocalBaseline(name="local_baseline_1") homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3) homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1", num_trees=3) evaluation_0 = Evaluation(name="evaluation_0") evaluation_1 = Evaluation(name="evaluation_1") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(model=data_transform_0.output.model)) pipeline.add_component(federated_sample_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_binning_0, data=Data(data=federated_sample_0.output.data)) pipeline.add_component(homo_binning_1, data=Data(data=data_transform_1.output.data), model=Model(model=homo_binning_0.output.model)) pipeline.add_component(homo_onehot_0, data=Data(data=homo_binning_0.output.data)) pipeline.add_component(homo_onehot_1, data=Data(data=homo_binning_1.output.data), model=Model(model=homo_onehot_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(data=homo_onehot_1.output.data), model=Model(model=homo_lr_0.output.model)) pipeline.add_component(local_baseline_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(local_baseline_1, data=Data(data=homo_onehot_1.output.data), model=Model(model=local_baseline_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(homo_secureboost_1, data=Data(data=homo_onehot_1.output.data), model=Model(model=homo_secureboost_0.output.model)) pipeline.add_component(evaluation_0, data=Data( data=[homo_lr_0.output.data, homo_lr_1.output.data, local_baseline_0.output.data, local_baseline_1.output.data])) pipeline.add_component(evaluation_1, data=Data( data=[homo_secureboost_0.output.data, homo_secureboost_1.output.data])) pipeline.compile() pipeline.fit() print(pipeline.get_component("evaluation_0").get_summary()) print(pipeline.get_component("evaluation_1").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "paillier"}, tree_param={"max_depth": 3}, validation_freqs=1) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_1 = Evaluation(name="evaluation_1", eval_type="binary") # transformer transformer_0 = SBTTransformer(name='sbt_transformer_0', dense_format=True) # local baseline def get_local_baseline(idx): return LocalBaseline(name="local_baseline_{}".format(idx), model_name="LogisticRegression", model_opts={ "penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "lbfgs", "max_iter": 50 }) local_baseline_0 = get_local_baseline(0) local_baseline_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) local_baseline_1 = get_local_baseline(1) local_baseline_1.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_1.get_party_instance( role='host', party_id=host).component_param(need_run=False) evaluation_1.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component( transformer_0, data=Data(data=intersect_0.output.data), model=Model(isometric_model=hetero_secure_boost_0.output.model)) pipeline.add_component(local_baseline_0, data=Data(data=transformer_0.output.data)) pipeline.add_component(local_baseline_1, data=Data(data=intersect_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=local_baseline_0.output.data)) pipeline.add_component(evaluation_1, data=Data(data=local_baseline_1.output.data)) pipeline.compile() pipeline.fit()