Example #1
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    # data sets
    guest_train_data = {"name": "student_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "student_hetero_host", "namespace": f"experiment{namespace}"}

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,)

    # set data reader and data-io

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role="guest",
        party_id=guest).component_param(
        with_label=True,
        output_format="dense",
        label_type="float")
    data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(name="hetero_secure_boost_0",
                                              num_trees=3,
                                              task_type="regression",
                                              objective_param={"objective": "lse"},
                                              encrypt_param={"method": "Paillier"},
                                              tree_param={"max_depth": 3},
                                              validation_freqs=1,
                                              cv_param={
                                                  "need_cv": True,
                                                  "n_splits": 5,
                                                  "shuffle": False,
                                                  "random_seed": 103
                                              }
                                              )

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data))

    pipeline.compile()
    pipeline.fit()

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_secure_boost_0").get_summary())
Example #2
0
    role="host", party_id=10000).component_param(table=host_train_data)

# Data transform provided some preprocessing to the raw data, including extract label, convert data format,
# filling missing value and so on. You may refer to the algorithm list doc for more details.
data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
data_transform_0.get_party_instance(
    role="host", party_id=10000).component_param(with_label=False)

# Perform PSI for hetero-scenario.
intersect_0 = Intersection(name="intersection_0")

# Define a hetero-secureboost component. The following parameters will be set for all parties involved.
hetero_secureboost_0 = HeteroSecureBoost(
    name="hetero_secureboost_0",
    num_trees=5,
    bin_num=16,
    task_type="classification",
    objective_param={"objective": "cross_entropy"},
    encrypt_param={"method": "paillier"},
    tree_param={"max_depth": 3})

# To show the evaluation result, an "Evaluation" component is needed.
evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

# add components to pipeline, in order of task execution
# The components are connected by indicating upstream data output as their input.
# Typically, a feature engineering component will indicate input data as "data" while
# the modeling component will use "train_data". Please check out carefully of the difference
# between hetero_secureboost_0 input and other components below.
# Here we are just showing a simple example, for more details of other components, please check
# out the examples in "example/pipeline/{component you are interested in}
pipeline.add_component(reader_0)\
Example #3
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    # data sets
    guest_train_data = {
        "name": "vehicle_scale_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "vehicle_scale_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "vehicle_scale_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "vehicle_scale_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_validate_data)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name="data_transform_1")

    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    data_transform_1.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(
        name="hetero_secure_boost_0",
        num_trees=3,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "Paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1,
        boosting_strategy='mix')

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(intersect_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_secure_boost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_secure_boost_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    # data sets
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_validate_data)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name="data_transform_1")

    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    data_transform_1.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(
        name="hetero_secure_boost_0",
        num_trees=3,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_1 = Evaluation(name="evaluation_1", eval_type="binary")

    # transformer
    transformer_0 = SBTTransformer(name='sbt_transformer_0', dense_format=True)

    # local baseline
    def get_local_baseline(idx):
        return LocalBaseline(name="local_baseline_{}".format(idx),
                             model_name="LogisticRegression",
                             model_opts={
                                 "penalty": "l2",
                                 "tol": 0.0001,
                                 "C": 1.0,
                                 "fit_intercept": True,
                                 "solver": "lbfgs",
                                 "max_iter": 50
                             })

    local_baseline_0 = get_local_baseline(0)
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    local_baseline_1 = get_local_baseline(1)
    local_baseline_1.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_1.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    evaluation_1.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(intersect_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))
    pipeline.add_component(
        transformer_0,
        data=Data(data=intersect_0.output.data),
        model=Model(isometric_model=hetero_secure_boost_0.output.model))

    pipeline.add_component(local_baseline_0,
                           data=Data(data=transformer_0.output.data))
    pipeline.add_component(local_baseline_1,
                           data=Data(data=intersect_0.output.data))

    pipeline.add_component(evaluation_0,
                           data=Data(data=local_baseline_0.output.data))
    pipeline.add_component(evaluation_1,
                           data=Data(data=local_baseline_1.output.data))

    pipeline.compile()
    pipeline.fit()
Example #5
0
def make_normal_dsl(config,
                    namespace,
                    selection_param,
                    is_multi_host=False,
                    host_dense_output=True,
                    **kwargs):
    parties = config.parties
    guest = parties.guest[0]
    if is_multi_host:
        hosts = parties.host
    else:
        hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(
        with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))

    last_cpn = intersection_0
    selection_include_model = []
    if 'binning_param' in kwargs:
        hetero_feature_binning_0 = HeteroFeatureBinning(
            **kwargs['binning_param'])
        pipeline.add_component(hetero_feature_binning_0,
                               data=Data(data=last_cpn.output.data))
        selection_include_model.append(hetero_feature_binning_0)
        # last_cpn = hetero_feature_binning_0

    if 'statistic_param' in kwargs:
        # print(f"param: {kwargs['statistic_param']}, kwargs: {kwargs}")
        statistic_0 = DataStatistics(**kwargs['statistic_param'])
        pipeline.add_component(statistic_0,
                               data=Data(data=last_cpn.output.data))
        # last_cpn = statistic_0
        selection_include_model.append(statistic_0)

    if 'psi_param' in kwargs:
        reader_1 = Reader(name="reader_1")
        reader_1.get_party_instance(
            role='guest',
            party_id=guest).component_param(table=guest_eval_data)
        reader_1.get_party_instance(
            role='host', party_id=hosts).component_param(table=host_eval_data)
        data_transform_1 = DataTransform(name="data_transform_1")
        intersection_1 = Intersection(name="intersection_1")
        pipeline.add_component(reader_1)
        pipeline.add_component(data_transform_1,
                               data=Data(data=reader_1.output.data),
                               model=Model(data_transform_0.output.model))
        pipeline.add_component(intersection_1,
                               data=Data(data=data_transform_1.output.data))

        psi_0 = PSI(**kwargs['psi_param'])
        pipeline.add_component(psi_0,
                               data=Data(
                                   train_data=intersection_0.output.data,
                                   validate_data=intersection_1.output.data))
        # last_cpn = statistic_0
        selection_include_model.append(psi_0)

    if 'sbt_param' in kwargs:
        secureboost_0 = HeteroSecureBoost(**kwargs['sbt_param'])

        pipeline.add_component(
            secureboost_0, data=Data(train_data=intersection_0.output.data))
        selection_include_model.append(secureboost_0)

    if "fast_sbt_param" in kwargs:
        fast_sbt_0 = HeteroFastSecureBoost(**kwargs['fast_sbt_param'])
        pipeline.add_component(
            fast_sbt_0, data=Data(train_data=intersection_0.output.data))
        selection_include_model.append(fast_sbt_0)

    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(
            isometric_model=[x.output.model for x in selection_include_model]))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()
    return pipeline
Example #6
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, missing_fill=True, outlier_replace=True)
    dataio_0.get_party_instance(role='host', party_id=host).component_param(
        with_label=False, missing_fill=True, outlier_replace=True)

    intersection_0 = Intersection(name="intersection_0")
    federated_sample_0 = FederatedSample(name="federated_sample_0",
                                         mode="stratified",
                                         method="upsample",
                                         fractions=[[0, 1.5], [1, 2.0]])
    feature_scale_0 = FeatureScale(name="feature_scale_0")
    hetero_feature_binning_0 = HeteroFeatureBinning(
        name="hetero_feature_binning_0")
    hetero_feature_selection_0 = HeteroFeatureSelection(
        name="hetero_feature_selection_0")
    one_hot_0 = OneHotEncoder(name="one_hot_0")
    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=10,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15)
    hetero_lr_1 = HeteroLR(name="hetero_lr_1",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=10,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15,
                           cv_param={
                               "n_splits": 5,
                               "shuffle": True,
                               "random_seed": 103,
                               "need_cv": True
                           })

    hetero_secureboost_0 = HeteroSecureBoost(name="hetero_secureboost_0",
                                             num_trees=5,
                                             cv_param={
                                                 "shuffle": False,
                                                 "need_cv": True
                                             })
    hetero_secureboost_1 = HeteroSecureBoost(name="hetero_secureboost_1",
                                             num_trees=5)
    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_1 = Evaluation(name="evaluation_1")

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(federated_sample_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(feature_scale_0,
                           data=Data(data=federated_sample_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=feature_scale_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data))
    pipeline.add_component(
        one_hot_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_lr_1,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_secureboost_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_secureboost_1,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))
    pipeline.add_component(evaluation_1,
                           data=Data(data=hetero_lr_1.output.data))
    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    print(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_2 = Reader(name="reader_2")
    reader_2.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_2.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      missing_fill=True,
                                                      outlier_replace=True)
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False,
                                                    missing_fill=True,
                                                    outlier_replace=True)
    data_transform_1 = DataTransform(name="data_transform_1")
    data_transform_2 = DataTransform(name="data_transform_2")

    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")
    intersection_2 = Intersection(name="intersection_2")

    union_0 = Union(name="union_0")

    federated_sample_0 = FederatedSample(name="federated_sample_0",
                                         mode="stratified",
                                         method="downsample",
                                         fractions=[[0, 1.0], [1, 1.0]])

    feature_scale_0 = FeatureScale(name="feature_scale_0")
    feature_scale_1 = FeatureScale(name="feature_scale_1")

    hetero_feature_binning_0 = HeteroFeatureBinning(
        name="hetero_feature_binning_0")
    hetero_feature_binning_1 = HeteroFeatureBinning(
        name="hetero_feature_binning_1")

    hetero_feature_selection_0 = HeteroFeatureSelection(
        name="hetero_feature_selection_0")
    hetero_feature_selection_1 = HeteroFeatureSelection(
        name="hetero_feature_selection_1")

    one_hot_0 = OneHotEncoder(name="one_hot_0")
    one_hot_1 = OneHotEncoder(name="one_hot_1")

    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=3,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15)
    hetero_lr_1 = HeteroLR(name="hetero_lr_1")
    hetero_lr_2 = HeteroLR(name="hetero_lr_2",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=3,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15,
                           cv_param={
                               "n_splits": 5,
                               "shuffle": True,
                               "random_seed": 103,
                               "need_cv": True
                           })

    hetero_sshe_lr_0 = HeteroSSHELR(
        name="hetero_sshe_lr_0",
        reveal_every_iter=True,
        reveal_strategy="respectively",
        penalty="L2",
        optimizer="rmsprop",
        tol=1e-5,
        batch_size=320,
        learning_rate=0.15,
        init_param={"init_method": "random_uniform"},
        alpha=0.01,
        max_iter=3)
    hetero_sshe_lr_1 = HeteroSSHELR(name="hetero_sshe_lr_1")

    local_baseline_0 = LocalBaseline(name="local_baseline_0",
                                     model_name="LogisticRegression",
                                     model_opts={
                                         "penalty": "l2",
                                         "tol": 0.0001,
                                         "C": 1.0,
                                         "fit_intercept": True,
                                         "solver": "lbfgs",
                                         "max_iter": 5,
                                         "multi_class": "ovr"
                                     })
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    local_baseline_1 = LocalBaseline(name="local_baseline_1")

    hetero_secureboost_0 = HeteroSecureBoost(name="hetero_secureboost_0",
                                             num_trees=3)
    hetero_secureboost_1 = HeteroSecureBoost(name="hetero_secureboost_1")
    hetero_secureboost_2 = HeteroSecureBoost(name="hetero_secureboost_2",
                                             num_trees=3,
                                             cv_param={
                                                 "shuffle": False,
                                                 "need_cv": True
                                             })

    hetero_linr_0 = HeteroLinR(name="hetero_linr_0",
                               penalty="L2",
                               optimizer="sgd",
                               tol=0.001,
                               alpha=0.01,
                               max_iter=3,
                               early_stop="weight_diff",
                               batch_size=-1,
                               learning_rate=0.15,
                               decay=0.0,
                               decay_sqrt=False,
                               init_param={"init_method": "zeros"},
                               floating_point_precision=23)
    hetero_linr_1 = HeteroLinR(name="hetero_linr_1")

    hetero_sshe_linr_0 = HeteroSSHELinR(name="hetero_sshe_linr_0",
                                        max_iter=5,
                                        early_stop="weight_diff",
                                        batch_size=-1)
    hetero_sshe_linr_1 = HeteroSSHELinR(name="hetero_sshe_linr_1")

    hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0",
                                     early_stop="weight_diff",
                                     max_iter=10,
                                     alpha=100.0,
                                     batch_size=-1,
                                     learning_rate=0.01,
                                     optimizer="rmsprop",
                                     exposure_colname="exposure",
                                     decay_sqrt=False,
                                     tol=0.001,
                                     init_param={"init_method": "zeros"},
                                     penalty="L2")
    hetero_poisson_1 = HeteroPoisson(name="hetero_poisson_1")

    hetero_sshe_poisson_0 = HeteroSSHEPoisson(name="hetero_sshe_poisson_0",
                                              max_iter=5)
    hetero_sshe_poisson_1 = HeteroSSHEPoisson(name="hetero_sshe_poisson_1")

    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_1 = Evaluation(name="evaluation_1")
    evaluation_2 = Evaluation(name="evaluation_2")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(reader_2)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(model=data_transform_0.output.model))
    pipeline.add_component(data_transform_2,
                           data=Data(data=reader_2.output.data),
                           model=Model(model=data_transform_0.output.model))

    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(intersection_2,
                           data=Data(data=data_transform_2.output.data))

    pipeline.add_component(
        union_0,
        data=Data(
            data=[intersection_0.output.data, intersection_2.output.data]))

    pipeline.add_component(federated_sample_0,
                           data=Data(data=intersection_1.output.data))

    pipeline.add_component(feature_scale_0,
                           data=Data(data=union_0.output.data))
    pipeline.add_component(feature_scale_1,
                           data=Data(data=federated_sample_0.output.data),
                           model=Model(model=feature_scale_0.output.model))

    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=feature_scale_0.output.data))
    pipeline.add_component(
        hetero_feature_binning_1,
        data=Data(data=feature_scale_1.output.data),
        model=Model(model=hetero_feature_binning_0.output.model))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_1,
        data=Data(data=hetero_feature_binning_1.output.data),
        model=Model(model=hetero_feature_selection_0.output.model))

    pipeline.add_component(
        one_hot_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(
        one_hot_1,
        data=Data(data=hetero_feature_selection_1.output.data),
        model=Model(model=one_hot_0.output.model))

    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_lr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_lr_0.output.model))
    pipeline.add_component(hetero_lr_2,
                           data=Data(train_data=one_hot_0.output.data))

    pipeline.add_component(local_baseline_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(local_baseline_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=local_baseline_0.output.model))

    pipeline.add_component(hetero_sshe_lr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_sshe_lr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_sshe_lr_0.output.model))

    pipeline.add_component(hetero_secureboost_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(
        hetero_secureboost_1,
        data=Data(test_data=one_hot_1.output.data),
        model=Model(model=hetero_secureboost_0.output.model))
    pipeline.add_component(hetero_secureboost_2,
                           data=Data(train_data=one_hot_0.output.data))

    pipeline.add_component(hetero_linr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_linr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_linr_0.output.model))

    pipeline.add_component(hetero_sshe_linr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_sshe_linr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_sshe_linr_0.output.model))

    pipeline.add_component(hetero_poisson_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_poisson_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_poisson_0.output.model))

    pipeline.add_component(
        evaluation_0,
        data=Data(data=[
            hetero_lr_0.output.data, hetero_lr_1.output.data,
            hetero_sshe_lr_0.output.data, hetero_sshe_lr_1.output.data,
            local_baseline_0.output.data, local_baseline_1.output.data
        ]))

    pipeline.add_component(hetero_sshe_poisson_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(
        hetero_sshe_poisson_1,
        data=Data(test_data=one_hot_1.output.data),
        model=Model(model=hetero_sshe_poisson_0.output.model))

    pipeline.add_component(
        evaluation_1,
        data=Data(data=[
            hetero_linr_0.output.data, hetero_linr_1.output.data,
            hetero_sshe_linr_0.output.data, hetero_linr_1.output.data
        ]))
    pipeline.add_component(
        evaluation_2,
        data=Data(data=[
            hetero_poisson_0.output.data, hetero_poisson_1.output.data,
            hetero_sshe_poisson_0.output.data,
            hetero_sshe_poisson_1.output.data
        ]))

    pipeline.compile()

    pipeline.fit()

    print(pipeline.get_component("evaluation_0").get_summary())
    print(pipeline.get_component("evaluation_1").get_summary())
    print(pipeline.get_component("evaluation_2").get_summary())
Example #8
0
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]
    arbiter = parties.arbiter[0]
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format='dense')

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    pipeline.add_component(reader_0)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(
        name="hetero_secure_boost_0",
        num_trees=3,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "Paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    hetero_secure_boost_1 = HeteroSecureBoost(
        name="hetero_secure_boost_1",
        num_trees=3,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "Paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    pipeline.add_component(hetero_secure_boost_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(
        hetero_secure_boost_1,
        data=Data(train_data=intersection_0.output.data),
        model=Model(model=hetero_secure_boost_0.output.model))

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_secure_boost_1.output.data))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("hetero_secure_boost_0").get_summary())
    prettify(pipeline.get_component("hetero_secure_boost_1").get_summary())
    prettify(pipeline.get_component("evaluation_0").get_summary())

    return pipeline
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    # data sets
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_validate_data)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name="data_transform_1")

    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    data_transform_1.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(
        name="hetero_secure_boost_0",
        num_trees=3,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "Paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(intersect_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_secure_boost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_secure_boost_0").get_summary())

    print('start to predict')

    # predict
    # deploy required components
    pipeline.deploy_component(
        [data_transform_0, intersect_0, hetero_secure_boost_0, evaluation_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))

    # run predict model
    predict_pipeline.predict()
    predict_result = predict_pipeline.get_component(
        "hetero_secure_boost_0").get_output_data()
    print("Showing 10 data of predict result")
    for ret in predict_result["data"][:10]:
        print(ret)