コード例 #1
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "vehicle_scale_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "vehicle_scale_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")

    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True,
        output_format="dense",
        label_type="int",
        label_name="y")
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="rsa",
                                  sync_intersect_ids=True,
                                  only_output_key=False)
    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           penalty="L2",
                           optimizer="nesterov_momentum_sgd",
                           tol=0.0001,
                           alpha=0.0001,
                           max_iter=30,
                           batch_size=-1,
                           early_stop="diff",
                           learning_rate=0.15,
                           init_param={"init_method": "zeros"})

    local_baseline_0 = LocalBaseline(name="local_baseline_0",
                                     model_name="LogisticRegression",
                                     model_opts={
                                         "penalty": "l2",
                                         "tol": 0.0001,
                                         "C": 1.0,
                                         "fit_intercept": True,
                                         "solver": "lbfgs",
                                         "max_iter": 5,
                                         "multi_class": "ovr"
                                     })
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="multi",
                              pos_label=1)
    evaluation_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(local_baseline_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(
        evaluation_0,
        data=Data(
            data=[hetero_lr_0.output.data, local_baseline_0.output.data]))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    # predict
    pipeline.deploy_component(
        [dataio_0, intersection_0, hetero_lr_0, local_baseline_0])

    predict_pipeline = PipeLine()
    predict_pipeline.add_component(reader_0)
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_0.output.data
                           }))
    predict_pipeline.add_component(
        evaluation_0,
        data=Data(
            data=[hetero_lr_0.output.data, local_baseline_0.output.data]))
    predict_pipeline.predict(job_parameters)
コード例 #2
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_eval_data)
    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0", with_label=True,
        output_format="dense")  # start component numbering at 0
    data_transform_1 = DataTransform(
        name="data_transform_1")  # start component numbering at 0

    scale_0 = FeatureScale(name='scale_0')
    scale_1 = FeatureScale(name='scale_1')

    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 3,
        "early_stop": "diff",
        "batch_size": 320,
        "learning_rate": 0.15,
        "callback_param": {
            "callbacks": ["EarlyStopping"],
            "validation_freqs": 1
        },
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))

    # set data input sources of intersection components
    pipeline.add_component(scale_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(scale_1,
                           data=Data(data=data_transform_1.output.data),
                           model=Model(scale_0.output.model))

    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data,
                                     validate_data=scale_1.output.data))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
コード例 #3
0
ファイル: fate-hetero_nn.py プロジェクト: yubo1993/FATE
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {"name": param["guest_table_name"], "namespace": f"experiment{namespace}"}
    host_train_data = {"name": param["host_table_name"], "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True)
    dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=param["epochs"],
                           interactive_layer_lr=param["learning_rate"], batch_size=param["batch_size"],
                           early_stop="diff")
    hetero_nn_0.add_bottom_model(Dense(units=param["bottom_layer_units"], input_shape=(10,), activation="tanh",
                                       kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123)))
    hetero_nn_0.set_interactve_layer(
        Dense(units=param["interactive_layer_units"], input_shape=(param["bottom_layer_units"],), activation="relu",
              kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123)))
    hetero_nn_0.add_top_model(
        Dense(units=param["top_layer_units"], input_shape=(param["interactive_layer_units"],),
              activation=param["top_act"],
              kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123)))
    opt = getattr(optimizers, param["opt"])(lr=param["learning_rate"])
    hetero_nn_0.compile(optimizer=opt, metrics=param["metrics"],
                        loss=param["loss"])
    hetero_nn_1 = HeteroNN(name="hetero_nn_1")

    if param["loss"] == "categorical_crossentropy":
        eval_type = "multi"
    else:
        eval_type = "binary"

    evaluation_0 = Evaluation(name="evaluation_0", eval_type=eval_type)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data),
                           model=Model(hetero_nn_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data))

    pipeline.compile()

    pipeline.fit()

    nn_0_data = pipeline.get_component("hetero_nn_0").get_output_data().get("data")
    nn_1_data = pipeline.get_component("hetero_nn_1").get_output_data().get("data")
    nn_0_score = extract_data(nn_0_data, "predict_result")
    nn_0_label = extract_data(nn_0_data, "label")
    nn_1_score = extract_data(nn_1_data, "predict_result")
    nn_1_label = extract_data(nn_1_data, "label")
    nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True)
    nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True)
    metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    if eval_type == "binary":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label),
            "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score),
            "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label,
                                                                                 nn_1_label)}
        metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn}
    elif eval_type == "multi":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)}
        metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn}

    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }
    return data_summary, metric_summary
コード例 #4
0
ファイル: pipeline-lr-multi.py プロジェクト: FederatedAI/FATE
def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    assert isinstance(param, dict)
    data_set = param.get("data_guest").split('/')[-1]
    if data_set == "vehicle_scale_hetero_guest.csv":
        guest_data_table = 'vehicle_scale_hetero_guest'
        host_data_table = 'vehicle_scale_hetero_host'
    else:
        raise ValueError(f"Cannot recognized data_set: {data_set}")

    guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
    host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    lr_param = {
    }

    config_param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "alpha": param["alpha"],
        "learning_rate": param["learning_rate"],
        "optimizer": param["optimizer"],
        "batch_size": param["batch_size"],
        "early_stop": "diff",
        "init_param": {
            "init_method": param.get("init_method", 'random_uniform'),
            "random_seed": param.get("random_seed", 103)
        }
    }
    lr_param.update(config_param)
    print(f"lr_param: {lr_param}, data_set: {data_set}")
    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param)
    hetero_lr_1 = HeteroLR(name='hetero_lr_1')

    evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi")

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data),
                           model=Model(hetero_lr_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary

    result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get("data")
    lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get("data")
    lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
    lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
    metric_lr = {
        "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label)}
    result_summary["distribution_metrics"] = {"hetero_lr": metric_lr}

    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }
    return data_summary, result_summary
コード例 #5
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    hetero_pearson_0 = HeteroPearson(name='hetero_pearson_0',
                                     column_indexes=-1)
    hetero_binning_0 = HeteroFeatureBinning(name='hetero_binning_0')
    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["vif_filter", "correlation_filter"],
        "vif_param": {
            "threshold": 5
        },
        "correlation_param": {
            "threshold": 0.5,
            "select_federated": False
        }
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_binning_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_pearson_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=[
            hetero_pearson_0.output.model, hetero_binning_0.output.model
        ]))

    pipeline.compile()

    pipeline.fit()
コード例 #6
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    guest_test_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }
    host_test_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    reader_1 = Reader(name="reader_1")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    reader_1.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_test_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_train_data)
    reader_1.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_test_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")  # start component numbering at 1

    param = {
        "with_label": True,
        "label_name": "y",
        "label_type": "int",
        "output_format": "dense",
        "missing_fill": True,
        "missing_fill_method": "mean",
        "outlier_replace": False,
        "outlier_replace_method": "designated",
        "outlier_replace_value": 0.66,
        "outlier_impute": "-9999"
    }
    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(**param)
    # get and configure DataIO party instance of host
    dataio_1.get_party_instance(role='guest',
                                party_id=guest).algorithm_param(**param)

    param = {
        "input_format": "tag",
        "with_label": False,
        "tag_with_value": True,
        "delimitor": ";",
        "output_format": "dense"
    }
    dataio_0.get_party_instance(role='host',
                                party_id=host).algorithm_param(**param)
    dataio_1.get_party_instance(role='host',
                                party_id=host).algorithm_param(**param)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="raw")
    intersection_1 = Intersection(name="intersection_1",
                                  intersect_method="raw")

    param = {
        'name': 'sample_0',
        "mode": "stratified",
        "method": "downsample",
        "fractions": [[0, 0.5], [1, 0.8]],
        "need_run": True
    }
    sample_0 = FederatedSample(**param)
    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv",
            "init_bucket_method": "quantile"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    statistic_0 = DataStatistics(name='statistic_0')
    param = {
        "name": 'hetero_feature_selection_0',
        "filter_methods": ["unique_value", "iv_filter", "statistic_filter"],
        "unique_param": {
            "eps": 1e-6
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.1]
        },
        "statistic_param": {
            "metrics": ["coefficient_of_variance", "skewness"],
            "filter_type": ["threshold", "threshold"],
            "take_high": [True, False],
            "threshold": [0.001, -0.01]
        },
        "select_col_indexes": -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)
    hetero_feature_selection_1 = HeteroFeatureSelection(
        name='hetero_feature_selection_1')
    param = {"name": "hetero_scale_0", "method": "standard_scale"}
    hetero_scale_0 = FeatureScale(**param)
    hetero_scale_1 = FeatureScale(name='hetero_scale_1')
    param = {
        "penalty": "L2",
        "optimizer": "nesterov_momentum_sgd",
        "tol": 1e-4,
        "alpha": 0.01,
        "max_iter": 5,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "zeros"
        },
        "validation_freqs": None,
        "early_stopping_rounds": None
    }

    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))

    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(sample_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))

    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=sample_0.output.data))

    pipeline.add_component(statistic_0, data=Data(data=sample_0.output.data))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=sample_0.output.data),
        model=Model(isometric_model=[
            hetero_feature_binning_0.output.model, statistic_0.output.model
        ]))
    pipeline.add_component(hetero_feature_selection_1,
                           data=Data(data=intersection_1.output.data),
                           model=Model(
                               hetero_feature_selection_0.output.model))

    pipeline.add_component(
        hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(
        hetero_scale_1,
        data=Data(data=hetero_feature_selection_1.output.data),
        model=Model(hetero_scale_0.output.model))

    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=hetero_scale_0.output.data,
                                     validate_data=hetero_scale_1.output.data))

    pipeline.add_component(evaluation_0,
                           data=Data(data=[hetero_lr_0.output.data]))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(pipeline.get_component("hetero_lr_0").get_summary())
コード例 #7
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = [{"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"},
                        {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"}]
    host_train_data = [{"name": "motor_hetero_host", "namespace": f"experiment{namespace}"},
                       {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"}]

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data[0])
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data[0])

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data[1])
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data[1])


    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_1 = DataTransform(name="data_transform_1")

    data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed",
                                                                             label_type="float", output_format="dense")
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    data_transform_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed",
                                                                             label_type="float", output_format="dense")
    data_transform_1.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    hetero_linr_0 = HeteroLinR(name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001,
                               alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1,
                               learning_rate=0.15, decay=0.0, decay_sqrt=False,
                               init_param={"init_method": "zeros"},
                               encrypted_mode_calculator_param={"mode": "fast"},
                               callback_param={"callbacks": ["EarlyStopping", "PerformanceEvaluate"],
                                               "validation_freqs": 1,
                                               "early_stopping_rounds": 5,
                                               "metrics": [
                                                   "mean_absolute_error",
                                                   "root_mean_squared_error"
                                               ],
                                               "use_first_metric_only": False,
                                               "save_freq": 1
                                               }
                               )

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model))

    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data))

    pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data,
                                                    validate_data=intersect_1.output.data))

    pipeline.compile()

    pipeline.fit()
def main(config="../../config.yaml", namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"}
    guest_validate_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"}

    host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    data_transform_0, data_transform_1 = DataTransform(name="data_transform_0"), DataTransform(name='data_transform_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(
        with_label=False, output_format="dense")

    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data)
    data_transform_1.get_party_instance(
        role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    data_transform_1.get_party_instance(
        role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    param = {
        "method": "quantile",
        "optimal_binning_param": {
            "metric_method": "gini",
            "min_bin_pct": 0.05,
            "max_bin_pct": 0.8,
            "init_bucket_method": "quantile",
            "init_bin_nums": 100,
            "mixture": True
        },
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": [0, 1, 2],
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    hetero_feature_binning_0 = HeteroFeatureBinning(name="hetero_feature_binning_0", **param)
    hetero_feature_binning_1 = HeteroFeatureBinning(name='hetero_feature_binning_1')

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["iv_filter"],
        "iv_param": {
            "metrics": ["iv", "iv", "iv"],
            "filter_type": ["threshold", "top_k", "top_percentile"],
            "threshold": [2, 10, 0.9],
            "mul_class_merge_type": ["max", "min", "average"]
        }}
    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)
    hetero_feature_selection_1 = HeteroFeatureSelection(name="hetero_feature_selection_1")

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(
        data_transform_1, data=Data(
            data=reader_1.output.data), model=Model(
            data_transform_0.output.model))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_feature_binning_1, data=Data(data=intersection_1.output.data),
                           model=Model(hetero_feature_binning_0.output.model))
    pipeline.add_component(hetero_feature_selection_0, data=Data(data=hetero_feature_binning_0.output.data),
                           model=Model(isometric_model=hetero_feature_binning_0.output.model))
    pipeline.add_component(hetero_feature_selection_1, data=Data(data=hetero_feature_binning_1.output.data),
                           model=Model(hetero_feature_selection_0.output.model))
    pipeline.compile()
    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, intersection_0, hetero_feature_selection_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_1)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline, data=Data(
            predict_input={
                pipeline.data_transform_0.input.data: reader_1.output.data}))
    # run predict model
    predict_pipeline.predict()
コード例 #9
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0", with_label=True,
                      output_format="dense")  # start component numbering at 0
    dataio_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    intersect_0 = Intersection(name='intersect_0')

    scale_0 = FeatureScale(name='scale_0')
    sample_weight_0 = SampleWeight(name="sample_weight_0",
                                   class_weight={
                                       "0": 1,
                                       "1": 2
                                   })
    sample_weight_0.get_party_instance(
        role="host", party_id=host).component_param(need_run=False)

    param = {
        "penalty": "L2",
        "optimizer": "rmsprop",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 3,
        "early_stop": "diff",
        "batch_size": 320,
        "learning_rate": 0.15,
        "decay": 1.0,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "cv_param": {
            "n_splits": 5,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }
    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(scale_0, data=Data(data=intersect_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=scale_0.output.data))

    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
コード例 #10
0
ファイル: fate-sbt.py プロジェクト: yubo1993/FATE
def main(config="../../config.yaml",
         param='./xgb_config_binary.yaml',
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": param['data_guest_train'],
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": param['data_guest_val'],
        "namespace": f"experiment{namespace}"
    }

    host_train_data = {
        "name": param['data_host_train'],
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": param['data_host_val'],
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_validate_data)
    dataio_1.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=param['tree_num'],
        task_type=param['task_type'],
        objective_param={"objective": param['loss_func']},
        tree_param={"max_depth": param['tree_depth']},
        validation_freqs=1,
        subsample_feature_rate=1,
        learning_rate=param['learning_rate'],
        bin_num=50)
    homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1")

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(homo_secureboost_0,
                           data=Data(train_data=dataio_0.output.data,
                                     validate_data=dataio_1.output.data))
    pipeline.add_component(homo_secureboost_1,
                           data=Data(test_data=dataio_1.output.data),
                           model=Model(homo_secureboost_0.output.model))
    pipeline.add_component(evaluation_0,
                           data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    sbt_0_data = pipeline.get_component(
        "homo_secureboost_0").get_output_data().get("data")
    sbt_1_data = pipeline.get_component(
        "homo_secureboost_1").get_output_data().get("data")
    sbt_0_score = extract_data(sbt_0_data, "predict_result")
    sbt_0_label = extract_data(sbt_0_data, "label")
    sbt_1_score = extract_data(sbt_1_data, "predict_result")
    sbt_1_label = extract_data(sbt_1_data, "label")
    sbt_0_score_label = extract_data(sbt_0_data,
                                     "predict_result",
                                     keep_id=True)
    sbt_1_score_label = extract_data(sbt_1_data,
                                     "predict_result",
                                     keep_id=True)
    metric_summary = parse_summary_result(
        pipeline.get_component("evaluation_0").get_summary())
    if param['eval_type'] == "regression":
        desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score)
        desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score)
        metric_summary["script_metrics"] = {
            "sbt_train": desc_sbt_0,
            "sbt_validate": desc_sbt_1
        }
    elif param['eval_type'] == "binary":
        metric_sbt = {
            "score_diversity_ratio":
            classification_metric.Distribution.compute(sbt_0_score_label,
                                                       sbt_1_score_label),
            "ks_2samp":
            classification_metric.KSTest.compute(sbt_0_score, sbt_1_score),
            "mAP_D_value":
            classification_metric.AveragePrecisionScore().compute(
                sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label)
        }
        metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt}
    elif param['eval_type'] == "multi":
        metric_sbt = {
            "score_diversity_ratio":
            classification_metric.Distribution.compute(sbt_0_score_label,
                                                       sbt_1_score_label)
        }
        metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt}

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_validate_data["name"],
            "host": host_validate_data["name"]
        }
    }

    return data_summary, metric_summary
コード例 #11
0
ファイル: pipeline-multi-model.py プロジェクト: zark7777/FATE
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, missing_fill=True, outlier_replace=True)
    dataio_0.get_party_instance(role='host', party_id=host).component_param(
        with_label=False, missing_fill=True, outlier_replace=True)

    intersection_0 = Intersection(name="intersection_0")
    federated_sample_0 = FederatedSample(name="federated_sample_0",
                                         mode="stratified",
                                         method="upsample",
                                         fractions=[[0, 1.5], [1, 2.0]])
    feature_scale_0 = FeatureScale(name="feature_scale_0")
    hetero_feature_binning_0 = HeteroFeatureBinning(
        name="hetero_feature_binning_0")
    hetero_feature_selection_0 = HeteroFeatureSelection(
        name="hetero_feature_selection_0")
    one_hot_0 = OneHotEncoder(name="one_hot_0")
    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=10,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15)
    hetero_lr_1 = HeteroLR(name="hetero_lr_1",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=10,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15,
                           cv_param={
                               "n_splits": 5,
                               "shuffle": True,
                               "random_seed": 103,
                               "need_cv": True
                           })

    hetero_secureboost_0 = HeteroSecureBoost(name="hetero_secureboost_0",
                                             num_trees=5,
                                             cv_param={
                                                 "shuffle": False,
                                                 "need_cv": True
                                             })
    hetero_secureboost_1 = HeteroSecureBoost(name="hetero_secureboost_1",
                                             num_trees=5)
    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_1 = Evaluation(name="evaluation_1")

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(federated_sample_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(feature_scale_0,
                           data=Data(data=federated_sample_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=feature_scale_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data))
    pipeline.add_component(
        one_hot_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_lr_1,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_secureboost_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_secureboost_1,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))
    pipeline.add_component(evaluation_1,
                           data=Data(data=hetero_secureboost_1.output.data))
    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    print(pipeline.get_component("evaluation_0").get_summary())
コード例 #12
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_2 = Reader(name="reader_2")
    reader_2.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_2.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      missing_fill=True,
                                                      outlier_replace=True)
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False,
                                                    missing_fill=True,
                                                    outlier_replace=True)
    data_transform_1 = DataTransform(name="data_transform_1")
    data_transform_2 = DataTransform(name="data_transform_2")

    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")
    intersection_2 = Intersection(name="intersection_2")

    union_0 = Union(name="union_0")

    federated_sample_0 = FederatedSample(name="federated_sample_0",
                                         mode="stratified",
                                         method="downsample",
                                         fractions=[[0, 1.0], [1, 1.0]])

    feature_scale_0 = FeatureScale(name="feature_scale_0")
    feature_scale_1 = FeatureScale(name="feature_scale_1")

    hetero_feature_binning_0 = HeteroFeatureBinning(
        name="hetero_feature_binning_0")
    hetero_feature_binning_1 = HeteroFeatureBinning(
        name="hetero_feature_binning_1")

    hetero_feature_selection_0 = HeteroFeatureSelection(
        name="hetero_feature_selection_0")
    hetero_feature_selection_1 = HeteroFeatureSelection(
        name="hetero_feature_selection_1")

    one_hot_0 = OneHotEncoder(name="one_hot_0")
    one_hot_1 = OneHotEncoder(name="one_hot_1")

    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=3,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15)
    hetero_lr_1 = HeteroLR(name="hetero_lr_1")
    hetero_lr_2 = HeteroLR(name="hetero_lr_2",
                           penalty="L2",
                           optimizer="rmsprop",
                           tol=1e-5,
                           init_param={"init_method": "random_uniform"},
                           alpha=0.01,
                           max_iter=3,
                           early_stop="diff",
                           batch_size=320,
                           learning_rate=0.15,
                           cv_param={
                               "n_splits": 5,
                               "shuffle": True,
                               "random_seed": 103,
                               "need_cv": True
                           })

    hetero_sshe_lr_0 = HeteroSSHELR(
        name="hetero_sshe_lr_0",
        reveal_every_iter=True,
        reveal_strategy="respectively",
        penalty="L2",
        optimizer="rmsprop",
        tol=1e-5,
        batch_size=320,
        learning_rate=0.15,
        init_param={"init_method": "random_uniform"},
        alpha=0.01,
        max_iter=3)
    hetero_sshe_lr_1 = HeteroSSHELR(name="hetero_sshe_lr_1")

    local_baseline_0 = LocalBaseline(name="local_baseline_0",
                                     model_name="LogisticRegression",
                                     model_opts={
                                         "penalty": "l2",
                                         "tol": 0.0001,
                                         "C": 1.0,
                                         "fit_intercept": True,
                                         "solver": "lbfgs",
                                         "max_iter": 5,
                                         "multi_class": "ovr"
                                     })
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    local_baseline_1 = LocalBaseline(name="local_baseline_1")

    hetero_secureboost_0 = HeteroSecureBoost(name="hetero_secureboost_0",
                                             num_trees=3)
    hetero_secureboost_1 = HeteroSecureBoost(name="hetero_secureboost_1")
    hetero_secureboost_2 = HeteroSecureBoost(name="hetero_secureboost_2",
                                             num_trees=3,
                                             cv_param={
                                                 "shuffle": False,
                                                 "need_cv": True
                                             })

    hetero_linr_0 = HeteroLinR(name="hetero_linr_0",
                               penalty="L2",
                               optimizer="sgd",
                               tol=0.001,
                               alpha=0.01,
                               max_iter=3,
                               early_stop="weight_diff",
                               batch_size=-1,
                               learning_rate=0.15,
                               decay=0.0,
                               decay_sqrt=False,
                               init_param={"init_method": "zeros"},
                               floating_point_precision=23)
    hetero_linr_1 = HeteroLinR(name="hetero_linr_1")

    hetero_sshe_linr_0 = HeteroSSHELinR(name="hetero_sshe_linr_0",
                                        max_iter=5,
                                        early_stop="weight_diff",
                                        batch_size=-1)
    hetero_sshe_linr_1 = HeteroSSHELinR(name="hetero_sshe_linr_1")

    hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0",
                                     early_stop="weight_diff",
                                     max_iter=10,
                                     alpha=100.0,
                                     batch_size=-1,
                                     learning_rate=0.01,
                                     optimizer="rmsprop",
                                     exposure_colname="exposure",
                                     decay_sqrt=False,
                                     tol=0.001,
                                     init_param={"init_method": "zeros"},
                                     penalty="L2")
    hetero_poisson_1 = HeteroPoisson(name="hetero_poisson_1")

    hetero_sshe_poisson_0 = HeteroSSHEPoisson(name="hetero_sshe_poisson_0",
                                              max_iter=5)
    hetero_sshe_poisson_1 = HeteroSSHEPoisson(name="hetero_sshe_poisson_1")

    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_1 = Evaluation(name="evaluation_1")
    evaluation_2 = Evaluation(name="evaluation_2")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(reader_2)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(model=data_transform_0.output.model))
    pipeline.add_component(data_transform_2,
                           data=Data(data=reader_2.output.data),
                           model=Model(model=data_transform_0.output.model))

    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(intersection_2,
                           data=Data(data=data_transform_2.output.data))

    pipeline.add_component(
        union_0,
        data=Data(
            data=[intersection_0.output.data, intersection_2.output.data]))

    pipeline.add_component(federated_sample_0,
                           data=Data(data=intersection_1.output.data))

    pipeline.add_component(feature_scale_0,
                           data=Data(data=union_0.output.data))
    pipeline.add_component(feature_scale_1,
                           data=Data(data=federated_sample_0.output.data),
                           model=Model(model=feature_scale_0.output.model))

    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=feature_scale_0.output.data))
    pipeline.add_component(
        hetero_feature_binning_1,
        data=Data(data=feature_scale_1.output.data),
        model=Model(model=hetero_feature_binning_0.output.model))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_1,
        data=Data(data=hetero_feature_binning_1.output.data),
        model=Model(model=hetero_feature_selection_0.output.model))

    pipeline.add_component(
        one_hot_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(
        one_hot_1,
        data=Data(data=hetero_feature_selection_1.output.data),
        model=Model(model=one_hot_0.output.model))

    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_lr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_lr_0.output.model))
    pipeline.add_component(hetero_lr_2,
                           data=Data(train_data=one_hot_0.output.data))

    pipeline.add_component(local_baseline_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(local_baseline_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=local_baseline_0.output.model))

    pipeline.add_component(hetero_sshe_lr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_sshe_lr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_sshe_lr_0.output.model))

    pipeline.add_component(hetero_secureboost_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(
        hetero_secureboost_1,
        data=Data(test_data=one_hot_1.output.data),
        model=Model(model=hetero_secureboost_0.output.model))
    pipeline.add_component(hetero_secureboost_2,
                           data=Data(train_data=one_hot_0.output.data))

    pipeline.add_component(hetero_linr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_linr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_linr_0.output.model))

    pipeline.add_component(hetero_sshe_linr_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_sshe_linr_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_sshe_linr_0.output.model))

    pipeline.add_component(hetero_poisson_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(hetero_poisson_1,
                           data=Data(test_data=one_hot_1.output.data),
                           model=Model(model=hetero_poisson_0.output.model))

    pipeline.add_component(
        evaluation_0,
        data=Data(data=[
            hetero_lr_0.output.data, hetero_lr_1.output.data,
            hetero_sshe_lr_0.output.data, hetero_sshe_lr_1.output.data,
            local_baseline_0.output.data, local_baseline_1.output.data
        ]))

    pipeline.add_component(hetero_sshe_poisson_0,
                           data=Data(train_data=one_hot_0.output.data))
    pipeline.add_component(
        hetero_sshe_poisson_1,
        data=Data(test_data=one_hot_1.output.data),
        model=Model(model=hetero_sshe_poisson_0.output.model))

    pipeline.add_component(
        evaluation_1,
        data=Data(data=[
            hetero_linr_0.output.data, hetero_linr_1.output.data,
            hetero_sshe_linr_0.output.data, hetero_linr_1.output.data
        ]))
    pipeline.add_component(
        evaluation_2,
        data=Data(data=[
            hetero_poisson_0.output.data, hetero_poisson_1.output.data,
            hetero_sshe_poisson_0.output.data,
            hetero_sshe_poisson_1.output.data
        ]))

    pipeline.compile()

    pipeline.fit()

    print(pipeline.get_component("evaluation_0").get_summary())
    print(pipeline.get_component("evaluation_1").get_summary())
    print(pipeline.get_component("evaluation_2").get_summary())
コード例 #13
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = [{
        "name": "tag_value_1",
        "namespace": f"experiment{namespace}"
    }, {
        "name": "tag_value_2",
        "namespace": f"experiment{namespace}"
    }, {
        "name": "tag_value_3",
        "namespace": f"experiment{namespace}"
    }]

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_train_data[0])

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_train_data[1])

    reader_2 = Reader(name="reader_2")
    reader_2.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_train_data[2])

    union_0 = Union(name="union_0",
                    allow_missing=False,
                    keep_duplicate=True,
                    need_run=True)

    dataio_0 = DataIO(name="dataio_0",
                      input_format="tag",
                      with_label=False,
                      tag_with_value=True,
                      delimitor=",",
                      output_format="dense")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(reader_2)
    pipeline.add_component(
        union_0,
        data=Data(data=[
            reader_0.output.data, reader_1.output.data, reader_2.output.data
        ]))
    pipeline.add_component(dataio_0, data=Data(data=union_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
コード例 #14
0
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    backend = config.backend
    work_mode = config.work_mode
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }
    # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"}
    # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0", output_format='dense')

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    pipeline.add_component(reader_0)

    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))

    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    statistic_param = {
        "name": "statistic_0",
        "statistics": ["95%", "coefficient_of_variance", "stddev"],
        "column_indexes": -1,
        "column_names": []
    }
    statistic_0 = DataStatistics(**statistic_param)
    pipeline.add_component(statistic_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    prettify(pipeline.get_component("statistic_0").get_summary())
コード例 #15
0
ファイル: pipeline-scorecard.py プロジェクト: zpskt/FATE
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "default_credit_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "default_credit_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest",
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="rsa",
                                  sync_intersect_ids=True,
                                  only_output_key=False)

    param = {
        "penalty": "L2",
        "optimizer": "nesterov_momentum_sgd",
        "tol": 0.0001,
        "alpha": 0.01,
        "max_iter": 5,
        "early_stop": "weight_diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "random_uniform"
        },
        "sqn_param": {
            "update_interval_L": 3,
            "memory_M": 5,
            "sample_size": 5000,
            "random_seed": None
        }
    }

    hetero_lr_0 = HeteroLR(name="hetero_lr_0", **param)

    # define Scorecard component
    scorecard_0 = Scorecard(name="scorecard_0")
    scorecard_0.get_party_instance(
        role="guest", party_id=guest).component_param(need_run=True,
                                                      method="credit",
                                                      offset=500,
                                                      factor=20,
                                                      factor_base=2,
                                                      upper_limit_ratio=3,
                                                      lower_limit_value=0)
    scorecard_0.get_party_instance(
        role="host", party_id=host).component_param(need_run=False)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))

    pipeline.add_component(scorecard_0,
                           data=Data(data=hetero_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
コード例 #16
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "dvisits_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "dvisits_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      label_name="doctorco",
                                                      label_type="float",
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    hetero_poisson_0 = HeteroPoisson(
        name="hetero_poisson_0",
        early_stop="weight_diff",
        max_iter=3,
        alpha=100.0,
        batch_size=-1,
        learning_rate=0.01,
        optimizer="rmsprop",
        exposure_colname="exposure",
        decay_sqrt=False,
        tol=0.001,
        callback_param={"callbacks": ["ModelCheckpoint"]},
        init_param={"init_method": "zeros"},
        penalty="L2",
        encrypted_mode_calculator_param={"mode": "fast"})

    hetero_poisson_1 = HeteroPoisson(
        name="hetero_poisson_1",
        early_stop="weight_diff",
        max_iter=10,
        alpha=100.0,
        batch_size=-1,
        learning_rate=0.01,
        optimizer="rmsprop",
        exposure_colname="exposure",
        decay_sqrt=False,
        tol=0.001,
        penalty="L2",
        encrypted_mode_calculator_param={"mode": "fast"})

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="regression",
                              pos_label=1)
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_poisson_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_poisson_1,
                           data=Data(train_data=intersection_0.output.data),
                           model=Model(model=hetero_poisson_0.output.model))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_poisson_1.output.data))

    pipeline.compile()

    pipeline.fit()
コード例 #17
0
ファイル: pipeline-lr-multi.py プロジェクト: wzxJayce/FATE
def main(config="../../config.yaml", param="./vechile_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    assert isinstance(param, dict)
    """
    guest = 9999
    host = 10000
    arbiter = 9999
    backend = 0
    work_mode = 1
    param = {"penalty": "L2", "max_iter": 5}
    """
    data_set = param.get("data_guest").split('/')[-1]
    if data_set == "vehicle_scale_hetero_guest.csv":
        guest_data_table = 'vehicle_scale_hetero_guest'
        host_data_table = 'vehicle_scale_hetero_host'
    else:
        raise ValueError(f"Cannot recognized data_set: {data_set}")

    guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
    host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(with_label=True, output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    lr_param = {
        "validation_freqs": None,
        "early_stopping_rounds": None,
    }

    config_param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "alpha": param["alpha"],
        "learning_rate": param["learning_rate"],
        "optimizer": param["optimizer"],
        "batch_size": param["batch_size"],
        "early_stop": "diff",
        "init_param": {
            "init_method": param.get("init_method", 'random_uniform')
        }
    }
    lr_param.update(config_param)
    print(f"lr_param: {lr_param}, data_set: {data_set}")
    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param)

    evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi")

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(pipeline.get_component("evaluation_0").get_summary())
    result_summary = pipeline.get_component("evaluation_0").get_summary()
    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }
    return data_summary, result_summary
コード例 #18
0
def main(config="../../config.yaml", namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "ionosphere_scale_guest", "namespace": f"experiment{namespace}"}
    guest_validate_data = {"name": "ionosphere_scale_guest", "namespace": f"experiment{namespace}"}

    host_train_data = {"name": "ionosphere_scale_host", "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": "ionosphere_scale_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense",
                                                                              label_name="label")
    dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense",
                                                                            label_name="label")

    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data)
    dataio_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense",
                                                                              label_name="label")
    dataio_1.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense",
                                                                            label_name="label")

    homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0",
                                         num_trees=3,
                                         task_type='classification',
                                         objective_param={"objective": "cross_entropy"},
                                         use_missing=True,
                                         tree_param={
                                             "max_depth": 3,
                                             "use_missing": True
                                         },
                                         validation_freqs=1
                                         )

    evaluation_0 = Evaluation(name='evaluation_0', eval_type='binary')

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model))
    pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data,
                                                         validate_data=dataio_1.output.data
                                                         ))
    pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
コード例 #19
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "heart_nonscaled_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "heart_nonscaled_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "heart_nonscaled_hetero_test",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "heart_nonscaled_hetero_test",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_eval_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0",
                      with_label=True,
                      output_format="dense",
                      label_name='target')  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")

    homo_onehot_param = {
        "transform_col_indexes": [1, 2, 5, 6, 8, 10, 11, 12],
        "transform_col_names": [],
        "need_alignment": True
    }

    homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0',
                                      **homo_onehot_param)
    homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1')

    scale_0 = FeatureScale(name='scale_0', method="standard_scale")
    scale_1 = FeatureScale(name='scale_1')

    homo_lr_param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 3,
        "early_stop": "diff",
        "batch_size": 500,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": "Paillier"
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **homo_lr_param)
    homo_lr_1 = HomoLR(name='homo_lr_1')

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set dataio_1 to replicate model from dataio_0
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))

    pipeline.add_component(homo_onehot_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(homo_onehot_1,
                           data=Data(data=dataio_1.output.data),
                           model=Model(homo_onehot_0.output.model))
    pipeline.add_component(scale_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(scale_1,
                           data=Data(data=homo_onehot_1.output.data),
                           model=Model(scale_0.output.model))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data))
    pipeline.add_component(homo_lr_1,
                           data=Data(test_data=scale_1.output.data),
                           model=Model(homo_lr_0.output.model))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).algorithm_param(need_run=False)
    pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data]))
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True,
        label_name="y",
        label_type="int",
        output_format="dense")
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    binning_param = {
        "name": 'hetero_feature_binning_0',
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["iv_value_thres"],
        "iv_value_param": {
            "value_threshold": 0.1
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**binning_param)

    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)

    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True,
                                                      class_weight={
                                                          "0": 1,
                                                          "1": 2
                                                      })
    sample_weight_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    feature_scale_0 = FeatureScale(name="feature_scale_0",
                                   method="standard_scale",
                                   need_run=True)

    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           optimizer="nesterov_momentum_sgd",
                           tol=0.001,
                           alpha=0.01,
                           max_iter=20,
                           early_stop="weight_diff",
                           batch_size=-1,
                           learning_rate=0.15,
                           init_param={"init_method": "zeros"})

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="binary",
                              pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=sample_weight_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data),
        model=Model(isometric_model=[hetero_feature_binning_0.output.model]))
    pipeline.add_component(feature_scale_0,
                           data=Data(hetero_feature_selection_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=feature_scale_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
コード例 #21
0
def main():
    # parties config
    guest = 9999
    host = 10000
    arbiter = 10000
    # 0 for eggroll, 1 for spark
    backend = Backend.EGGROLL
    # 0 for standalone, 1 for cluster
    work_mode = WorkMode.STANDALONE
    # use the work mode below for cluster deployment
    # work_mode = WorkMode.CLUSTER

    # specify input data name & namespace in database
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_train_data)

    # define DataIO component
    dataio_0 = DataIO(name="dataio_0")

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest",
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role="host", party_id=host).algorithm_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    # define HeteroLR component
    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           early_stop="diff",
                           learning_rate=0.15,
                           optimizer="rmsprop",
                           max_iter=10)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    import json
    print(
        json.dumps(pipeline.get_component("hetero_lr_0").get_summary(),
                   indent=4))

    # predict
    # deploy required components
    pipeline.deploy_component([dataio_0, intersection_0, hetero_lr_0])

    # initiate predict pipeline
    predict_pipeline = PipeLine()

    # define new data reader
    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_eval_data)

    # define evaluation component
    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(need_run=True,
                                                      eval_type="binary")
    evaluation_0.get_party_instance(
        role="host", party_id=host).algorithm_param(need_run=False)

    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_1)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_1.output.data
                           }))
    # add evaluation component to predict pipeline
    predict_pipeline.add_component(
        evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data))
    # run predict model
    predict_pipeline.predict(backend=backend, work_mode=work_mode)
コード例 #22
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_eval_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    param = {"k": 3, "max_iter": 10}

    hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param)
    hetero_kmeans_1 = HeteroKmeans(name='hetero_kmeans_1')
    evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering')
    evaluation_1 = Evaluation(name='evaluation_1', eval_type='clustering')

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))
    # set train & validate data of hetero_lr_0 component

    pipeline.add_component(hetero_kmeans_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_kmeans_1,
                           data=Data(train_data=intersection_1.output.data))
    # print(f"data: {hetero_kmeans_0.output.data.data[0]}")
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_kmeans_0.output.data.data[0]))
    pipeline.add_component(evaluation_1,
                           data=Data(data=hetero_kmeans_1.output.data.data[0]))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    print(pipeline.get_component("hetero_kmeans_0").get_summary())
コード例 #23
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")

    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=False,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False,
                                                    output_format="dense")

    param_0 = {
        "intersect_method": "rsa",
        "rsa_params": {
            "hash_method": "sha256",
            "final_hash_method": "sha256",
            "key_length": 2048
        },
        "run_cache": True
    }
    param_1 = {
        "intersect_method": "rsa",
        "sync_intersect_ids": False,
        "only_output_key": True,
        "rsa_params": {
            "hash_method": "sha256",
            "final_hash_method": "sha256",
            "key_length": 2048
        }
    }
    intersect_0 = Intersection(name="intersect_0", **param_0)
    intersect_1 = Intersection(name="intersect_1", **param_1)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersect_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1,
                           data=Data(data=data_transform_0.output.data),
                           cache=Cache(intersect_0.output.cache))

    pipeline.compile()

    pipeline.fit()
コード例 #24
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}

    guest_validate_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,)

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(role="host", party_id=host).algorithm_param(table=host_train_data)
    reader_1.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_validate_data)
    reader_1.get_party_instance(role="host", party_id=host).algorithm_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_1.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_secure_boost_0 = HeteroFastSecureBoost(name="hetero_fast_secure_boost_0",
                                                       num_trees=4,
                                                       tree_num_per_party=1, task_type='classification',
                                                       objective_param={"objective": "cross_entropy"},
                                                       encrypt_param={"method": "iterativeAffine"},
                                                       tree_param={"max_depth": 3},
                                                       validation_freqs=1,
                                                       work_mode='mix')

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_fast_secure_boost_0, data=Data(train_data=intersect_0.output.data,
                                                                 validate_data=intersect_1.output.data))

    pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data))

    pipeline.compile()
    pipeline.fit(backend=backend, work_mode=work_mode)

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary())
コード例 #25
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "motor_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "motor_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      label_name="motor_speed",
                                                      label_type="float",
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True,
                                                      sample_weight_name="pm")
    sample_weight_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    hetero_linr_0 = HeteroSSHELinR(name="hetero_linr_0",
                                   penalty="L2",
                                   optimizer="rmsprop",
                                   tol=0.001,
                                   alpha=0.01,
                                   max_iter=20,
                                   early_stop="weight_diff",
                                   batch_size=-1,
                                   learning_rate=0.15,
                                   decay=0.0,
                                   decay_sqrt=False,
                                   init_param={"init_method": "zeros"},
                                   reveal_every_iter=True,
                                   reveal_strategy="respectively")

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="regression",
                              pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_linr_0,
                           data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_linr_0.output.data))

    pipeline.compile()

    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component(
        [data_transform_0, intersection_0, hetero_linr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()
コード例 #26
0
ファイル: fate-homo_nn.py プロジェクト: zark7777/FATE
def main(config="../../config.yaml", param="param_conf.yaml", namespace=""):
    num_host = 1

    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    epoch = param["epoch"]
    lr = param["lr"]
    batch_size = param.get("batch_size", -1)
    optimizer_name = param.get("optimizer", "Adam")
    encode_label = param.get("encode_label", True)
    loss = param.get("loss", "categorical_crossentropy")
    metrics = param.get("metrics", ["accuracy"])
    layers = param["layers"]
    data = getattr(dataset, param.get("dataset", "vehicle"))

    guest_train_data = data["guest"]
    host_train_data = data["host"][:num_host]
    for d in [guest_train_data, *host_train_data]:
        d["namespace"] = f"{d['namespace']}{namespace}"

    hosts = config.parties.host[:num_host]
    pipeline = PipeLine() \
        .set_initiator(role='guest', party_id=config.parties.guest[0]) \
        .set_roles(guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=config.parties.guest[0]).component_param(table=guest_train_data)
    for i in range(num_host):
        reader_0.get_party_instance(role='host', party_id=hosts[i]) \
            .component_param(table=host_train_data[i])

    dataio_0 = DataIO(name="dataio_0", with_label=True)
    dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \
        .component_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True)

    homo_nn_0 = HomoNN(name="homo_nn_0", encode_label=encode_label, max_iter=epoch, batch_size=batch_size,
                       early_stop={"early_stop": "diff", "eps": 0.0})
    for layer_config in layers:
        layer = getattr(tensorflow.keras.layers, layer_config["name"])
        layer_params = layer_config["params"]
        homo_nn_0.add(layer(**layer_params))
        homo_nn_0.compile(optimizer=getattr(optimizers, optimizer_name)(learning_rate=lr), metrics=metrics,
                          loss=loss)
    homo_nn_1 = HomoNN(name="homo_nn_1")
    if param["loss"] == "categorical_crossentropy":
        eval_type = "multi"
    else:
        eval_type = "binary"
    evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi", metrics=["accuracy", "precision", "recall"])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(homo_nn_0, data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(homo_nn_1, data=Data(test_data=dataio_0.output.data),
                           model=Model(homo_nn_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data))
    pipeline.compile()
    job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode)
    pipeline.fit(job_parameters)
    metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    nn_0_data = pipeline.get_component("homo_nn_0").get_output_data().get("data")
    nn_1_data = pipeline.get_component("homo_nn_1").get_output_data().get("data")
    nn_0_score = extract_data(nn_0_data, "predict_result")
    nn_0_label = extract_data(nn_0_data, "label")
    nn_1_score = extract_data(nn_1_data, "predict_result")
    nn_1_label = extract_data(nn_1_data, "label")
    nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True)
    nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True)
    if eval_type == "binary":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label),
            "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score),
            "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label,
                                                                                 nn_1_label)}
        metric_summary["distribution_metrics"] = {"homo_nn": metric_nn}
    elif eval_type == "multi":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)}
        metric_summary["distribution_metrics"] = {"homo_nn": metric_nn}

    data_summary = dict(
        train={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}},
        test={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}}
    )
    return data_summary, metric_summary
コード例 #27
0
ファイル: fate-linr.py プロジェクト: FederatedAI/FATE
def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    guest_train_data = {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense",
                                                  label_name=param["label_name"], label_type="float")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "optimizer": param["optimizer"],
        "learning_rate": param["learning_rate"],
        "init_param": param["init_param"],
        "batch_size": param["batch_size"],
        "alpha": param["alpha"]
    }

    hetero_linr_0 = HeteroLinR(name='hetero_linr_0', **param)
    hetero_linr_1 = HeteroLinR(name='hetero_linr_1')

    evaluation_0 = Evaluation(name='evaluation_0', eval_type="regression",
                              metrics=["r2_score",
                                       "mean_squared_error",
                                       "root_mean_squared_error",
                                       "explained_variance"])

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_linr_1,data=Data(test_data=intersection_0.output.data),
                           model=Model(hetero_linr_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_linr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()

    metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())

    data_linr_0 = extract_data(pipeline.get_component("hetero_linr_0").get_output_data().get("data"), "predict_result")
    data_linr_1 = extract_data(pipeline.get_component("hetero_linr_1").get_output_data().get("data"), "predict_result")
    desc_linr_0 = regression_metric.Describe().compute(data_linr_0)
    desc_linr_1 = regression_metric.Describe().compute(data_linr_1)

    metric_summary["script_metrics"] = {"linr_train": desc_linr_0,
                                        "linr_validate": desc_linr_1}

    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }
    return data_summary, metric_summary
コード例 #28
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="y",
                                                                             label_type="int", output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True,
                                                                                     sample_weight_name="x0")
    sample_weight_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    hetero_lr_0 = HeteroLR(name="hetero_lr_0", optimizer="nesterov_momentum_sgd", tol=0.001,
                               alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1,
                               learning_rate=0.15,
                               init_param={"init_method": "zeros"})

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(sample_weight_0, data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_lr_0, data=Data(train_data=sample_weight_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    # predict
    # deploy required components
    pipeline.deploy_component([dataio_0, intersection_0, sample_weight_0, hetero_lr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data}))
    # run predict model
    predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format='dense')

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    pipeline.add_component(reader_0)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))

    lr_param = {
        "name": "hetero_sshe_lr_0",
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 0.0001,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "weight_diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "zeros",
            "fit_intercept": False
        },
        "encrypt_param": {
            "key_length": 1024
        },
        "reveal_every_iter": False,
        "reveal_strategy": "respectively"
    }

    hetero_sshe_lr_0 = HeteroSSHELR(**lr_param)
    pipeline.add_component(hetero_sshe_lr_0,
                           data=Data(train_data=intersection_0.output.data))

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_sshe_lr_0.output.data))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary())
    prettify(pipeline.get_component("evaluation_0").get_summary())

    pipeline.deploy_component(
        [data_transform_0, intersection_0, hetero_sshe_lr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()

    return pipeline
コード例 #30
0
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]
    arbiter = parties.arbiter[0]
    guest_train_data = {
        "name": "vehicle_scale_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "vehicle_scale_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format='dense',
                                     with_label=True)

    pipeline.add_component(reader_0)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    lr_param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        },
        "callback_param": {
            "callbacks": ["ModelCheckpoint", "EarlyStopping"]
        }
    }

    homo_lr_0 = HomoLR(name="homo_lr_0", max_iter=1, **lr_param)
    homo_lr_1 = HomoLR(name="homo_lr_1")

    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=data_transform_0.output.data))
    pipeline.add_component(homo_lr_1,
                           data=Data(test_data=data_transform_0.output.data),
                           model=Model(model=homo_lr_0.output.model))

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi")
    pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data]))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("evaluation_0").get_summary())
    return pipeline