Exemple #1
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)
    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense",
                      label_type="int", label_name="y")

    homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", optimizer="sgd",
                       tol=0.0001, alpha=0.01, max_iter=30, batch_size=-1,
                       early_stop="weight_diff", learning_rate=0.15, init_param={"init_method": "zeros"})

    local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression",
                                     model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True,
                                                 "solver": "saga", "max_iter": 2})
    local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1)
    evaluation_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True)
    evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(homo_lr_0, data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(local_baseline_0, data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data]))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
    data_transform_1 = DataTransform(name="data_transform_1")

    federated_sample_0 = FederatedSample(name="federated_sample_0", mode="stratified", method="downsample",
                                         fractions=[[0, 1.0], [1, 1.0]], task_type="h**o")

    homo_binning_0 = HomoFeatureBinning(name='homo_binning_0', sample_bins=10, method="recursive_query")
    homo_binning_1 = HomoFeatureBinning(name='homo_binning_1')

    homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', need_alignment=True)
    homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1')

    homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", tol=0.0001, alpha=1.0,
                       optimizer="rmsprop", max_iter=5)
    homo_lr_1 = HomoLR(name="homo_lr_1")

    local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression",
                                     model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True,
                                                 "solver": "lbfgs", "max_iter": 5, "multi_class": "ovr"})
    local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=True)
    local_baseline_1 = LocalBaseline(name="local_baseline_1")

    homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3)
    homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1", num_trees=3)

    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_1 = Evaluation(name="evaluation_1")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)

    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data),
                           model=Model(model=data_transform_0.output.model))

    pipeline.add_component(federated_sample_0, data=Data(data=data_transform_0.output.data))

    pipeline.add_component(homo_binning_0, data=Data(data=federated_sample_0.output.data))
    pipeline.add_component(homo_binning_1, data=Data(data=data_transform_1.output.data),
                           model=Model(model=homo_binning_0.output.model))

    pipeline.add_component(homo_onehot_0, data=Data(data=homo_binning_0.output.data))
    pipeline.add_component(homo_onehot_1, data=Data(data=homo_binning_1.output.data),
                           model=Model(model=homo_onehot_0.output.model))

    pipeline.add_component(homo_lr_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(homo_lr_1, data=Data(data=homo_onehot_1.output.data),
                           model=Model(model=homo_lr_0.output.model))

    pipeline.add_component(local_baseline_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(local_baseline_1, data=Data(data=homo_onehot_1.output.data),
                           model=Model(model=local_baseline_0.output.model))

    pipeline.add_component(homo_secureboost_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(homo_secureboost_1, data=Data(data=homo_onehot_1.output.data),
                           model=Model(model=homo_secureboost_0.output.model))

    pipeline.add_component(evaluation_0,
                           data=Data(
                               data=[homo_lr_0.output.data, homo_lr_1.output.data,
                                     local_baseline_0.output.data, local_baseline_1.output.data]))
    pipeline.add_component(evaluation_1,
                           data=Data(
                               data=[homo_secureboost_0.output.data, homo_secureboost_1.output.data]))

    pipeline.compile()

    pipeline.fit()

    print(pipeline.get_component("evaluation_0").get_summary())
    print(pipeline.get_component("evaluation_1").get_summary())
Exemple #3
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment_sid{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment_sid{namespace}"
    }

    guest_eval_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment_sid{namespace}"
    }
    host_eval_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment_sid{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_eval_data)
    # define DataTransform components
    data_transform_0 = DataTransform(name="data_transform_0",
                                     with_match_id=True,
                                     with_label=True,
                                     output_format="dense")
    data_transform_1 = DataTransform(
        name="data_transform_1")  # start component numbering at 0

    scale_0 = FeatureScale(name='scale_0')
    scale_1 = FeatureScale(name='scale_1')

    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 3,
        "early_stop": "diff",
        "batch_size": 320,
        "learning_rate": 0.15,
        "validation_freqs": 1,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))

    # set data input sources of intersection components
    pipeline.add_component(scale_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(scale_1,
                           data=Data(data=data_transform_1.output.data),
                           model=Model(scale_0.output.model))

    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data,
                                     validate_data=scale_1.output.data))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
Exemple #4
0
def main(config="../../config.yaml",
         param="./breast_lr_config.yaml",
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    assert isinstance(param, dict)
    """
    guest = 9999
    host = 10000
    arbiter = 9999
    backend = 0
    work_mode = 1
    param = {"penalty": "L2", "max_iter": 5}
    """
    data_set = param.get("data_guest").split('/')[-1]
    if data_set == "default_credit_homo_guest.csv":
        guest_data_table = 'default_credit_guest'
        host_data_table = 'default_credit_host1'

    elif data_set == 'breast_homo_guest.csv':
        guest_data_table = 'breast_homo_guest'
        host_data_table = 'breast_homo_host'

    elif data_set == 'give_credit_homo_guest.csv':
        guest_data_table = 'give_credit_homo_guest'
        host_data_table = 'give_credit_homo_host'

    elif data_set == 'epsilon_5k_homo_guest.csv':
        guest_data_table = 'epsilon_5k_homo_guest'
        host_data_table = 'epsilon_5k_homo_host'

    else:
        raise ValueError(f"Cannot recognized data_set: {data_set}")

    guest_train_data = {
        "name": guest_data_table,
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": host_data_table,
        "namespace": f"experiment{namespace}"
    }
    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(role='host',
                                party_id=host).component_param(with_label=True)

    lr_param = {}

    config_param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "alpha": param["alpha"],
        "learning_rate": param["learning_rate"],
        "optimizer": param.get("optimizer", "sgd"),
        "batch_size": param.get("batch_size", -1),
        "init_param": {
            "init_method": param.get("init_method", 'random_uniform')
        },
        "encrypt_param": {
            "method": None
        }
    }
    lr_param.update(config_param)
    print(f"lr_param: {lr_param}, data_set: {data_set}")
    homo_lr_0 = HomoLR(name='homo_lr_0', **lr_param)
    homo_lr_1 = HomoLR(name='homo_lr_1')

    evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(homo_lr_1,
                           data=Data(test_data=dataio_0.output.data),
                           model=Model(homo_lr_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }
    result_summary = parse_summary_result(
        pipeline.get_component("evaluation_0").get_summary())
    lr_0_data = pipeline.get_component("homo_lr_0").get_output_data().get(
        "data")
    lr_1_data = pipeline.get_component("homo_lr_1").get_output_data().get(
        "data")
    lr_0_score = extract_data(lr_0_data, "predict_result")
    lr_0_label = extract_data(lr_0_data, "label")
    lr_1_score = extract_data(lr_1_data, "predict_result")
    lr_1_label = extract_data(lr_1_data, "label")
    lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
    lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
    metric_lr = {
        "score_diversity_ratio":
        classification_metric.Distribution.compute(lr_0_score_label,
                                                   lr_1_score_label),
        "ks_2samp":
        classification_metric.KSTest.compute(lr_0_score, lr_1_score),
        "mAP_D_value":
        classification_metric.AveragePrecisionScore().compute(
            lr_0_score, lr_1_score, lr_0_label, lr_1_label)
    }
    result_summary["distribution_metrics"] = {"homo_lr": metric_lr}

    return data_summary, result_summary
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]
    arbiter = parties.arbiter[0]
    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format='dense',
                                     with_label=True)

    pipeline.add_component(reader_0)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    lr_param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        },
        "callback_param": {
            "callbacks": ["ModelCheckpoint", "EarlyStopping"]
        }
    }

    homo_lr_0 = HomoLR(name="homo_lr_0", max_iter=3, **lr_param)
    homo_lr_1 = HomoLR(name="homo_lr_1", max_iter=30, **lr_param)

    homo_lr_2 = HomoLR(name="homo_lr_2", max_iter=30, **lr_param)

    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=data_transform_0.output.data))
    pipeline.add_component(homo_lr_1,
                           data=Data(train_data=data_transform_0.output.data),
                           model=Model(model=homo_lr_0.output.model))
    pipeline.add_component(homo_lr_2,
                           data=Data(train_data=data_transform_0.output.data))

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_1.output.data, homo_lr_2.output.data]))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("evaluation_0").get_summary())
    return pipeline
Exemple #6
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0", with_label=True,
        output_format="dense")  # start component numbering at 0

    scale_0 = FeatureScale(name='scale_0')
    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(scale_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()

    deploy_components = [data_transform_0, scale_0, homo_lr_0]
    pipeline.deploy_component(components=deploy_components)
    #
    predict_pipeline = PipeLine()
    # # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # # add selected components from train pipeline onto predict pipeline
    # # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    predict_pipeline.compile()
    predict_pipeline.predict()

    dsl_json = predict_pipeline.get_predict_dsl()
    conf_json = predict_pipeline.get_predict_conf()
    # import json
    json.dump(dsl_json,
              open('./h**o-lr-normal-predict-dsl.json', 'w'),
              indent=4)
    json.dump(conf_json,
              open('./h**o-lr-normal-predict-conf.json', 'w'),
              indent=4)

    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
Exemple #7
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "mock_string",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "mock_string",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "mock_string",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "mock_string",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_eval_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0",
        with_label=True,
        output_format="dense",
        label_name='y',
        data_type="str")  # start component numbering at 0
    data_transform_1 = DataTransform(name="data_transform_1")

    homo_onehot_param = {
        "transform_col_indexes": -1,
        "transform_col_names": [],
        "need_alignment": True
    }

    homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0',
                                      **homo_onehot_param)
    homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1')

    scale_0 = FeatureScale(name='scale_0', method="standard_scale")
    scale_1 = FeatureScale(name='scale_1')

    homo_lr_param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 3,
        "early_stop": "diff",
        "batch_size": 500,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": "Paillier"
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **homo_lr_param)
    homo_lr_1 = HomoLR(name='homo_lr_1')

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    # set data_transform_1 to replicate model from data_transform_0
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))

    pipeline.add_component(homo_onehot_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(homo_onehot_1,
                           data=Data(data=data_transform_1.output.data),
                           model=Model(homo_onehot_0.output.model))
    pipeline.add_component(scale_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(scale_1,
                           data=Data(data=homo_onehot_1.output.data),
                           model=Model(scale_0.output.model))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data))
    pipeline.add_component(homo_lr_1,
                           data=Data(test_data=scale_1.output.data),
                           model=Model(homo_lr_0.output.model))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data]))
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
Exemple #8
0
def make_normal_dsl(config, namespace):
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]
    arbiter = parties.arbiter[0]
    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(
        with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=True)

    scale_0 = FeatureScale(name='scale_0')

    homo_sbt_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=3,
        task_type='classification',
        objective_param={"objective": "cross_entropy"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    # define Intersection components
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(scale_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(homo_sbt_0,
                           data=Data(train_data=scale_0.output.data))

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["homo_sbt_filter"],
        "sbt_param": {
            "metrics": "feature_importance",
            "filter_type": "threshold",
            "take_high": True,
            "threshold": 0.03
        }
    }
    feature_selection_0 = HeteroFeatureSelection(**selection_param)
    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)
    pipeline.add_component(
        feature_selection_0,
        data=Data(data=scale_0.output.data),
        model=Model(isometric_model=homo_sbt_0.output.model))
    pipeline.add_component(
        homo_lr_0, data=Data(train_data=feature_selection_0.output.data))
    evaluation_0 = Evaluation(name='evaluation_0')
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()
    return pipeline