Example #1
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=hosts, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="y",
                                                                             label_type="int", output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True,
                                                                                     class_weight="balanced")
    sample_weight_0.get_party_instance(role='host', party_id=hosts).component_param(need_run=False)

    hetero_lr_0 = HeteroLR(name="hetero_lr_0", optimizer="nesterov_momentum_sgd", tol=0.001,
                               alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1,
                               learning_rate=0.15,
                               init_param={"init_method": "zeros"})

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(sample_weight_0, data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_lr_0, data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
Example #2
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment_sid{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment_sid{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0",
        with_match_id=True,
        with_label=True,
        output_format="dense")  # start component numbering at 0

    scale_0 = FeatureScale(name='scale_0')
    sample_weight_0 = SampleWeight(name="sample_weight_0",
                                   class_weight={
                                       "0": 1,
                                       "1": 2
                                   })

    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 3,
        "early_stop": "diff",
        "batch_size": 320,
        "learning_rate": 0.15,
        "decay": 1.0,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": "Paillier"
        },
        "cv_param": {
            "n_splits": 5,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }
    homo_lr_0 = HomoLR(name='homo_lr_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(scale_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=scale_0.output.data))

    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
Example #3
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      label_name="y",
                                                      label_type="int",
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True,
                                                      class_weight="balanced")
    sample_weight_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    sample_weight_1 = SampleWeight(name="sample_weight_1")

    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           optimizer="nesterov_momentum_sgd",
                           tol=0.001,
                           alpha=0.01,
                           max_iter=20,
                           early_stop="weight_diff",
                           batch_size=-1,
                           learning_rate=0.15,
                           init_param={"init_method": "zeros"})

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="binary",
                              pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(sample_weight_1,
                           data=Data(data=intersection_0.output.data),
                           model=Model(model=sample_weight_0.output.model))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=sample_weight_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()

    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component(
        [data_transform_0, intersection_0, sample_weight_0, hetero_lr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()
Example #4
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "motor_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "motor_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      label_name="motor_speed",
                                                      label_type="float",
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True,
                                                      sample_weight_name="pm")
    sample_weight_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    hetero_linr_0 = HeteroSSHELinR(name="hetero_linr_0",
                                   penalty="L2",
                                   optimizer="rmsprop",
                                   tol=0.001,
                                   alpha=0.01,
                                   max_iter=20,
                                   early_stop="weight_diff",
                                   batch_size=-1,
                                   learning_rate=0.15,
                                   decay=0.0,
                                   decay_sqrt=False,
                                   init_param={"init_method": "zeros"},
                                   reveal_every_iter=True,
                                   reveal_strategy="respectively")

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="regression",
                              pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_linr_0,
                           data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_linr_0.output.data))

    pipeline.compile()

    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component(
        [data_transform_0, intersection_0, hetero_linr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True,
        label_name="y",
        label_type="int",
        output_format="dense")
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    binning_param = {
        "name": 'hetero_feature_binning_0',
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["iv_value_thres"],
        "iv_value_param": {
            "value_threshold": 0.1
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**binning_param)

    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)

    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True,
                                                      class_weight={
                                                          "0": 1,
                                                          "1": 2
                                                      })
    sample_weight_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    feature_scale_0 = FeatureScale(name="feature_scale_0",
                                   method="standard_scale",
                                   need_run=True)

    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           optimizer="nesterov_momentum_sgd",
                           tol=0.001,
                           alpha=0.01,
                           max_iter=20,
                           early_stop="weight_diff",
                           batch_size=-1,
                           learning_rate=0.15,
                           init_param={"init_method": "zeros"})

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="binary",
                              pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=sample_weight_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data),
        model=Model(isometric_model=[hetero_feature_binning_0.output.model]))
    pipeline.add_component(feature_scale_0,
                           data=Data(hetero_feature_selection_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=feature_scale_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
Example #6
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")

    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True,
        output_format="dense",
        label_type="int",
        label_name="y")
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="rsa",
                                  sync_intersect_ids=True,
                                  only_output_key=False)

    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True,
                                                      class_weight={
                                                          "0": 1,
                                                          "1": 2
                                                      })
    sample_weight_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           penalty="L2",
                           optimizer="nesterov_momentum_sgd",
                           tol=0.0001,
                           alpha=0.0001,
                           max_iter=30,
                           batch_size=-1,
                           early_stop="diff",
                           learning_rate=0.15,
                           init_param={"init_method": "zeros"})

    local_baseline_0 = LocalBaseline(name="local_baseline_0",
                                     model_name="LogisticRegression",
                                     model_opts={
                                         "penalty": "l2",
                                         "tol": 0.0001,
                                         "C": 1.0,
                                         "fit_intercept": True,
                                         "solver": "lbfgs",
                                         "max_iter": 5,
                                         "multi_class": "ovr"
                                     })
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="binary",
                              pos_label=1)
    evaluation_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(local_baseline_0,
                           data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(
        evaluation_0,
        data=Data(
            data=[hetero_lr_0.output.data, local_baseline_0.output.data]))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)