def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "tag_value_1000_140", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True)
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False,
                                                                                    input_format="tag",
                                                                                    tag_with_value=True)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
Ejemplo n.º 2
0
def main():
    # parties config
    guest = 9999

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": "experiment"}

    tag_data = {"name": "tag_value_1", "namespace": "experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # csv file name from python path & file name
    pipeline_upload.add_upload_data(
        file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition,  # data info
        id_delimiter=",")  # id delimiter, needed for spark

    pipeline_upload.add_upload_data(file=os.path.join(
        DATA_BASE, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0,
                                    partition=partition,
                                    id_delimiter=",")
    # upload all data
    pipeline_upload.upload(drop=1)
    import json
    print(json.dumps(pipeline_upload._upload_conf(), indent=4))
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "ionosphere_scale_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "ionosphere_scale_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(role='guest',
                                        party_id=guest).component_param(
                                            with_label=True,
                                            label_name="LABEL",
                                            missing_fill=True,
                                            missing_fill_method="mean",
                                            outlier_replace=True)
    data_transform_0.get_party_instance(role='host',
                                        party_id=host).component_param(
                                            with_label=False,
                                            missing_fill=True,
                                            missing_fill_method="designated",
                                            default_value=0,
                                            outlier_replace=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
Ejemplo n.º 4
0
def run_homo_nn_pipeline(config, namespace, data: dict, nn_component,
                         num_host):
    if isinstance(config, str):
        config = load_job_config(config)

    guest_train_data = data["guest"]
    host_train_data = data["host"][:num_host]
    for d in [guest_train_data, *host_train_data]:
        d["namespace"] = f"{d['namespace']}{namespace}"

    hosts = config.parties.host[:num_host]
    pipeline = (PipeLine().set_initiator(
        role="guest", party_id=config.parties.guest[0]).set_roles(
            guest=config.parties.guest[0],
            host=hosts,
            arbiter=config.parties.arbiter))

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role="guest", party_id=config.parties.guest[0]).component_param(
            table=guest_train_data)
    for i in range(num_host):
        reader_0.get_party_instance(
            role="host",
            party_id=hosts[i]).component_param(table=host_train_data[i])

    dataio_0 = DataIO(name="dataio_0", with_label=True)
    dataio_0.get_party_instance(
        role="guest", party_id=config.parties.guest[0]).component_param(
            with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role="host", party_id=hosts).component_param(with_label=True)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(nn_component,
                           data=Data(train_data=dataio_0.output.data))
    pipeline.compile()
    job_parameters = JobParameters(backend=config.backend,
                                   work_mode=config.work_mode)
    pipeline.fit(job_parameters)
    print(pipeline.get_component("homo_nn_0").get_summary())
    pipeline.deploy_component([dataio_0, nn_component])

    # predict
    predict_pipeline = PipeLine()
    predict_pipeline.add_component(reader_0)
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_0.output.data
                           }),
    )
    # run predict model
    predict_pipeline.predict(job_parameters)
Ejemplo n.º 5
0
def main():
    # parties config
    guest = 9999

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}

    tag_data = {"name": "tag_value_1", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # original csv file path
    pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"),
                                    table_name=dense_data["name"],             # table name
                                    namespace=dense_data["namespace"],         # namespace
                                    head=1, partition=partition)               # data info

    pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0, partition=partition)

    # upload all data
    pipeline_upload.upload(drop=1)
Ejemplo n.º 6
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    data_base = config.data_base_dir

    # partition for data storage
    partition = 4

    # table name and namespace, used in FATE job configuration
    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
    tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"}

    pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)

    # add upload data info
    # path to csv file(s) to be uploaded
    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
                                    table_name=dense_data["name"],             # table name
                                    namespace=dense_data["namespace"],         # namespace
                                    head=1, partition=partition,               # data info
                                    id_delimiter=",",
                                    extend_sid=True)

    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0, partition=partition,
                                    id_delimiter=",",
                                    extend_sid=True)
    # upload both data
    pipeline_upload.upload(drop=1)
Ejemplo n.º 7
0
def main(data_base=DATA_BASE):
    # parties config
    guest = 9999

    # partition for data storage
    partition = 4

    # table name and namespace, used in FATE job configuration
    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}
    tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # path to csv file(s) to be uploaded, modify to upload designated data
    # This is an example for standalone version. For cluster version, you will need to upload your data
    # on each party respectively.
    pipeline_upload.add_upload_data(
        file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition)  # data info

    pipeline_upload.add_upload_data(file=os.path.join(
        data_base, "examples/data/breast_hetero_host.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=1,
                                    partition=partition)

    # upload data
    pipeline_upload.upload(drop=1)
Ejemplo n.º 8
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    backend = config.backend
    work_mode = config.work_mode
    data_base = config.data_base

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}

    tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"}

    pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # csv file name from python path & file name
    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
                                    table_name=dense_data["name"],             # table name
                                    namespace=dense_data["namespace"],         # namespace
                                    head=1, partition=partition,               # data info
                                    id_delimiter=",")                          # needed for spark backend

    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0, partition=partition,
                                    id_delimiter=",")
    # upload all data
    pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
Ejemplo n.º 9
0
    def test_upload(self):
        upload_pipeline = PipeLine()
        upload_pipeline.add_upload_data(file=self.file,
                                        table_name=self.table_name, namespace=self.job_id)
        upload_pipeline.upload()

        upload_count = session.get_data_table(self.table_name, self.job_id).count()
        return upload_count == self.data_count
Ejemplo n.º 10
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment_sid{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment_sid{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     with_match_id=True,
                                     with_label=True)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    pipeline.compile()

    pipeline.fit()
Ejemplo n.º 11
0
def main(data_base=DATA_BASE):
    # parties config
    guest = 9999
    # 0 for eggroll, 1 for spark
    backend = Backend.EGGROLL
    # 0 for standalone, 1 for cluster
    work_mode = WorkMode.STANDALONE
    # use the work mode below for cluster deployment
    # work_mode = WorkMode.CLUSTER

    # partition for data storage
    partition = 4

    # table name and namespace, used in FATE job configuration
    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}
    tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # path to csv file(s) to be uploaded, modify to upload designated data
    pipeline_upload.add_upload_data(
        file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition)  # data info

    pipeline_upload.add_upload_data(file=os.path.join(
        data_base, "examples/data/breast_hetero_host.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=1,
                                    partition=partition)

    # upload data
    pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
Ejemplo n.º 12
0
def main():
    # parties config
    guest = 9999
    # 0 for eggroll, 1 for spark
    backend = Backend.EGGROLL
    # 0 for standalone, 1 for cluster
    work_mode = WorkMode.STANDALONE
    # use the work mode below for cluster deployment
    # work_mode = WorkMode.CLUSTER

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}

    tag_data = {"name": "tag_value_1", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # original csv file path
    pipeline_upload.add_upload_data(
        file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition)  # data info

    pipeline_upload.add_upload_data(file=os.path.join(
        DATA_BASE, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0,
                                    partition=partition)

    # upload all data
    pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
Ejemplo n.º 13
0
def main():
    # parties config
    guest = 9999
    host = 10000
    arbiter = 10000

    # specify input data name & namespace in database
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)

    # define DataTransform component
    data_transform_0 = DataTransform(name="data_transform_0")

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role="guest", party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(
        with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    # define HeteroLR component
    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           early_stop="diff",
                           learning_rate=0.15,
                           optimizer="rmsprop",
                           max_iter=10)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    # set train data of hetero_lr_0 component
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    import json
    print(
        json.dumps(pipeline.get_component("hetero_lr_0").get_summary(),
                   indent=4))

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0])

    # initiate predict pipeline
    predict_pipeline = PipeLine()

    # define new data reader
    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_eval_data)

    # define evaluation component
    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_0.get_party_instance(
        role="guest", party_id=guest).component_param(need_run=True,
                                                      eval_type="binary")
    evaluation_0.get_party_instance(
        role="host", party_id=host).component_param(need_run=False)

    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_1)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_1.output.data
        }))
    # add evaluation component to predict pipeline
    predict_pipeline.add_component(
        evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data))
    # run predict model
    predict_pipeline.predict()
Ejemplo n.º 14
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]

    guest_train_data = [{
        "name": "tag_value_1",
        "namespace": f"experiment{namespace}"
    }, {
        "name": "tag_value_2",
        "namespace": f"experiment{namespace}"
    }, {
        "name": "tag_value_3",
        "namespace": f"experiment{namespace}"
    }]

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_train_data[0])

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_train_data[1])

    reader_2 = Reader(name="reader_2")
    reader_2.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_train_data[2])

    union_0 = Union(name="union_0",
                    allow_missing=False,
                    keep_duplicate=True,
                    need_run=True)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     input_format="tag",
                                     with_label=False,
                                     tag_with_value=True,
                                     delimitor=",",
                                     output_format="dense")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(reader_2)
    pipeline.add_component(
        union_0,
        data=Data(data=[
            reader_0.output.data, reader_1.output.data, reader_2.output.data
        ]))
    pipeline.add_component(data_transform_0,
                           data=Data(data=union_0.output.data))

    pipeline.compile()

    pipeline.fit()
Ejemplo n.º 15
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest).set_roles(guest=guest)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)

    # define ColumnExpand components
    column_expand_0 = ColumnExpand(name="column_expand_0")
    column_expand_0.get_party_instance(
        role="guest", party_id=guest).component_param(
            need_run=True,
            method="manual",
            append_header=["x_0", "x_1", "x_2", "x_3"],
            fill_value=[0, 0.2, 0.5, 1])
    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest",
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(column_expand_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_0,
                           data=Data(data=column_expand_0.output.data))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
Ejemplo n.º 16
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    hetero_nn_0 = HeteroNN(name="hetero_nn_0",
                           epochs=100,
                           interactive_layer_lr=0.15,
                           batch_size=-1,
                           early_stop="diff",
                           selector_param={"method": "relative"})
    guest_nn_0 = hetero_nn_0.get_party_instance(role='guest', party_id=guest)
    guest_nn_0.add_bottom_model(
        Dense(units=3,
              input_shape=(10, ),
              activation="relu",
              kernel_initializer=initializers.Constant(value=1)))
    guest_nn_0.set_interactve_layer(
        Dense(units=2,
              input_shape=(2, ),
              kernel_initializer=initializers.Constant(value=1)))
    guest_nn_0.add_top_model(
        Dense(units=1,
              input_shape=(2, ),
              activation="sigmoid",
              kernel_initializer=initializers.Constant(value=1)))
    host_nn_0 = hetero_nn_0.get_party_instance(role='host', party_id=host)
    host_nn_0.add_bottom_model(
        Dense(units=3,
              input_shape=(20, ),
              activation="relu",
              kernel_initializer=initializers.Constant(value=1)))
    host_nn_0.set_interactve_layer(
        Dense(units=2,
              input_shape=(2, ),
              kernel_initializer=initializers.Constant(value=1)))
    hetero_nn_0.compile(optimizer=optimizers.SGD(lr=0.15),
                        metrics=["AUC"],
                        loss="binary_crossentropy")

    hetero_nn_1 = HeteroNN(name="hetero_nn_1")

    evaluation_0 = Evaluation(name="evaluation_0")

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_nn_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_nn_1,
                           data=Data(test_data=intersection_0.output.data),
                           model=Model(model=hetero_nn_0.output.model))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_nn_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    print(hetero_nn_0.get_config(roles={"guest": [guest], "host": [host]}))
    print(pipeline.get_component("hetero_nn_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)
    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     with_label=True,
                                     output_format="dense",
                                     label_type="int",
                                     label_name="y")

    homo_lr_0 = HomoLR(name="homo_lr_0",
                       penalty="L2",
                       optimizer="sgd",
                       tol=0.0001,
                       alpha=0.01,
                       max_iter=30,
                       batch_size=-1,
                       early_stop="weight_diff",
                       learning_rate=0.15,
                       init_param={"init_method": "zeros"})

    local_baseline_0 = LocalBaseline(name="local_baseline_0",
                                     model_name="LogisticRegression",
                                     model_opts={
                                         "penalty": "l2",
                                         "tol": 0.0001,
                                         "C": 1.0,
                                         "fit_intercept": True,
                                         "solver": "saga",
                                         "max_iter": 2
                                     })
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="binary",
                              pos_label=1)
    evaluation_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=data_transform_0.output.data))
    pipeline.add_component(local_baseline_0,
                           data=Data(train_data=data_transform_0.output.data))
    pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data]))

    pipeline.compile()

    pipeline.fit()

    # predict
    pipeline.deploy_component([data_transform_0, homo_lr_0, local_baseline_0])

    predict_pipeline = PipeLine()
    predict_pipeline.add_component(reader_0)
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    predict_pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data]))
    predict_pipeline.predict()
Ejemplo n.º 18
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    guest_test_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }
    host_test_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    reader_1 = Reader(name="reader_1")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_test_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_test_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")  # start component numbering at 1

    param = {
        "with_label": True,
        "label_name": "y",
        "label_type": "int",
        "output_format": "dense",
        "missing_fill": True,
        "missing_fill_method": "mean",
        "outlier_replace": False,
        "outlier_replace_method": "designated",
        "outlier_replace_value": 0.66,
        "outlier_impute": "-9999"
    }
    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(**param)
    # get and configure DataIO party instance of host
    dataio_1.get_party_instance(role='guest',
                                party_id=guest).component_param(**param)

    param = {
        "input_format": "tag",
        "with_label": False,
        "tag_with_value": True,
        "delimitor": ";",
        "output_format": "dense"
    }
    dataio_0.get_party_instance(role='host',
                                party_id=host).component_param(**param)
    dataio_1.get_party_instance(role='host',
                                party_id=host).component_param(**param)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="raw")
    intersection_1 = Intersection(name="intersection_1",
                                  intersect_method="raw")

    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv",
            "init_bucket_method": "quantile"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    statistic_0 = DataStatistics(name='statistic_0')
    param = {
        "name":
        'hetero_feature_selection_0',
        "filter_methods":
        ["manually", "unique_value", "iv_filter", "statistic_filter"],
        "manually_param": {
            "filter_out_indexes": [1, 2],
            "filter_out_names": ["x2", "x3"]
        },
        "unique_param": {
            "eps": 1e-6
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.1]
        },
        "statistic_param": {
            "metrics": ["coefficient_of_variance", "skewness"],
            "filter_type": ["threshold", "threshold"],
            "take_high": [True, False],
            "threshold": [0.001, -0.01]
        },
        "select_col_indexes":
        -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)
    hetero_feature_selection_1 = HeteroFeatureSelection(
        name='hetero_feature_selection_1')

    param = {
        "task_type": "classification",
        "learning_rate": 0.1,
        "num_trees": 10,
        "subsample_feature_rate": 0.5,
        "n_iter_no_change": False,
        "tol": 0.0002,
        "bin_num": 50,
        "objective_param": {
            "objective": "cross_entropy"
        },
        "encrypt_param": {
            "method": "paillier"
        },
        "predict_param": {
            "threshold": 0.5
        },
        "tree_param": {
            "max_depth": 2
        },
        "cv_param": {
            "n_splits": 5,
            "shuffle": False,
            "random_seed": 103,
            "need_cv": False
        },
        "validation_freqs": 2,
        "early_stopping_rounds": 5,
        "metrics": ["auc", "ks"]
    }

    hetero_secureboost_0 = HeteroSecureBoost(name='hetero_secureboost_0',
                                             **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))

    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))

    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(statistic_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=[
            hetero_feature_binning_0.output.model, statistic_0.output.model
        ]))
    pipeline.add_component(hetero_feature_selection_1,
                           data=Data(data=intersection_1.output.data),
                           model=Model(
                               hetero_feature_selection_0.output.model))

    # set train & validate data of hetero_secureboost_0 component
    pipeline.add_component(
        hetero_secureboost_0,
        data=Data(train_data=hetero_feature_selection_0.output.data,
                  validate_data=hetero_feature_selection_1.output.data))

    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_secureboost_0.output.data))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    print(pipeline.get_component("hetero_secureboost_0").get_summary())
Ejemplo n.º 19
0
from pipeline.backend.config import Backend, WorkMode
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, DataTransform, Intersection, HeteroSecureBoost, Evaluation
from pipeline.interface import Data
from pipeline.runtime.entity import JobParameters

# table name & namespace in data storage
# data should be uploaded before running modeling task
guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}

# initialize pipeline
# Party ids are indicators of parties involved in federated learning. For standalone mode,
# arbitrary integers can be used as party id.
pipeline = PipeLine().set_initiator(role="guest",
                                    party_id=9999).set_roles(guest=9999,
                                                             host=10000)

# define components

# Reader is a component to obtain the uploaded data. This component are very likely to be needed.
reader_0 = Reader(name="reader_0")
# By the following way, you can set different parameters for different party.
reader_0.get_party_instance(
    role="guest", party_id=9999).component_param(table=guest_train_data)
reader_0.get_party_instance(
    role="host", party_id=10000).component_param(table=host_train_data)

# Data transform provided some preprocessing to the raw data, including extract label, convert data format,
# filling missing value and so on. You may refer to the algorithm list doc for more details.
data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
Ejemplo n.º 20
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    guest_test_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }
    host_test_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    reader_1 = Reader(name="reader_1")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    reader_1.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_test_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_train_data)
    reader_1.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_test_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")  # start component numbering at 1

    param = {
        "with_label": True,
        "label_name": "y",
        "label_type": "int",
        "output_format": "dense",
        "missing_fill": True,
        "missing_fill_method": "mean",
        "outlier_replace": False,
        "outlier_replace_method": "designated",
        "outlier_replace_value": 0.66,
        "outlier_impute": "-9999"
    }
    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(**param)
    # get and configure DataIO party instance of host
    dataio_1.get_party_instance(role='guest',
                                party_id=guest).algorithm_param(**param)

    param = {
        "input_format": "tag",
        "with_label": False,
        "tag_with_value": True,
        "delimitor": ";",
        "output_format": "dense"
    }
    dataio_0.get_party_instance(role='host',
                                party_id=host).algorithm_param(**param)
    dataio_1.get_party_instance(role='host',
                                party_id=host).algorithm_param(**param)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="raw")
    intersection_1 = Intersection(name="intersection_1",
                                  intersect_method="raw")

    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv",
            "init_bucket_method": "quantile"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    statistic_0 = DataStatistics(name='statistic_0')
    param = {
        "name":
        'hetero_feature_selection_0',
        "filter_methods":
        ["manually", "unique_value", "iv_filter", "statistic_filter"],
        "manually_param": {
            "filter_out_indexes": [1, 2],
            "filter_out_names": ["x3", "x4"]
        },
        "unique_param": {
            "eps": 1e-6
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.1]
        },
        "statistic_param": {
            "metrics": ["coefficient_of_variance", "skewness"],
            "filter_type": ["threshold", "threshold"],
            "take_high": [True, False],
            "threshold": [0.001, -0.01]
        },
        "select_col_indexes":
        -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)
    hetero_feature_selection_1 = HeteroFeatureSelection(
        name='hetero_feature_selection_1')
    param = {"name": "hetero_scale_0", "method": "standard_scale"}
    hetero_scale_0 = FeatureScale(**param)
    hetero_scale_1 = FeatureScale(name='hetero_scale_1')
    param = {
        "penalty": "L2",
        "optimizer": "nesterov_momentum_sgd",
        "tol": 1e-4,
        "alpha": 0.01,
        "max_iter": 5,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "zeros"
        },
        "validation_freqs": None,
        "early_stopping_rounds": None
    }

    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))

    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))

    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(statistic_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=[
            hetero_feature_binning_0.output.model, statistic_0.output.model
        ]))
    pipeline.add_component(hetero_feature_selection_1,
                           data=Data(data=intersection_1.output.data),
                           model=Model(
                               hetero_feature_selection_0.output.model))

    pipeline.add_component(
        hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(
        hetero_scale_1,
        data=Data(data=hetero_feature_selection_1.output.data),
        model=Model(hetero_scale_0.output.model))

    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=hetero_scale_0.output.data,
                                     validate_data=hetero_scale_1.output.data))

    pipeline.add_component(evaluation_0,
                           data=Data(data=[hetero_lr_0.output.data]))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0", with_label=True,
        output_format="dense")  # start component numbering at 0

    homo_binning_0 = HomoFeatureBinning(name='homo_binning_0',
                                        sample_bins=1000,
                                        method="recursive_query")
    homo_binning_1 = HomoFeatureBinning(name='homo_binning_1',
                                        sample_bins=1000)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(homo_binning_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(homo_binning_1,
                           data=Data(data=data_transform_0.output.data),
                           model=Model(model=homo_binning_0.output.model))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
Ejemplo n.º 22
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0", with_label=True,
                      output_format="dense")  # start component numbering at 0

    scale_0 = FeatureScale(name='scale_0')
    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
Ejemplo n.º 23
0
import json
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, DataTransform, Intersection, HeteroSecureBoost, Evaluation
from pipeline.interface import Data

# table name & namespace in data storage
# data should be uploaded before running modeling task
guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}

# initialize pipeline
# Party ids are indicators of parties involved in federated learning. For standalone mode,
# arbitrary integers can be used as party id.
pipeline = PipeLine().set_initiator(role="guest",
                                    party_id=9999).set_roles(guest=9999,
                                                             host=10000)

# define components

# Reader is a component to obtain the uploaded data. This component are very likely to be needed.
reader_0 = Reader(name="reader_0")
# By the following way, you can set different parameters for different party.
reader_0.get_party_instance(
    role="guest", party_id=9999).component_param(table=guest_train_data)
reader_0.get_party_instance(
    role="host", party_id=10000).component_param(table=host_train_data)

# Data transform provided some preprocessing to the raw data, including extract label, convert data format,
# filling missing value and so on. You may refer to the algorithm list doc for more details.
data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
Ejemplo n.º 24
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "nus_wide_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "nus_wide_host",
        "namespace": f"experiment{namespace}"
    }
    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0',
                             epochs=10,
                             alpha=1,
                             batch_size=-1,
                             mode='plain')

    hetero_ftl_0.add_nn_layer(
        Dense(units=32,
              activation='sigmoid',
              kernel_initializer=initializers.RandomNormal(stddev=1.0,
                                                           dtype="float32"),
              bias_initializer=initializers.Zeros()))

    hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01))
    evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(hetero_ftl_0,
                           data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_ftl_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    """
Ejemplo n.º 25
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "dvisits_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "dvisits_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format="sparse")

    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      label_name="doctorco",
                                                      label_type="float")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    hetero_poisson_0 = HeteroPoisson(
        name="hetero_poisson_0",
        early_stop="weight_diff",
        max_iter=2,
        alpha=100.0,
        batch_size=-1,
        learning_rate=0.01,
        exposure_colname="exposure",
        optimizer="rmsprop",
        penalty="L2",
        decay_sqrt=False,
        tol=0.001,
        init_param={"init_method": "zeros"},
    )

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="regression",
                              pos_label=1)
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_poisson_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_poisson_0.output.data))

    pipeline.compile()

    pipeline.fit()
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]

    guest_train_data = {"name": "ionosphere_scale_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "ionosphere_scale_hetero_host", "namespace": f"experiment{namespace}"}
    # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"}
    # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0", output_format='dense', missing_fill=False)

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True, label_name="label")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    pipeline.add_component(reader_0)

    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))

    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))

    statistic_param = {
        "name": "statistic_0",
        "statistics": ["95%", "coefficient_of_variance", "stddev"],
        "column_indexes": [1, 2],
        "column_names": ["x3"]
    }
    statistic_0 = DataStatistics(**statistic_param)
    pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("statistic_0").get_summary())
Ejemplo n.º 27
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {
        "name": "student_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "student_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role="guest", party_id=guest).component_param(
        with_label=True, output_format="dense", label_type="float")
    dataio_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(
        name="hetero_secure_boost_0",
        num_trees=3,
        task_type="regression",
        objective_param={"objective": "lse"},
        encrypt_param={"method": "iterativeAffine"},
        tree_param={"max_depth": 3},
        validation_freqs=1,
        cv_param={
            "need_cv": True,
            "n_splits": 5,
            "shuffle": False,
            "random_seed": 103
        })

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_secure_boost_0").get_summary())
Ejemplo n.º 28
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    # specify input data name & namespace in database
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=host).algorithm_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    intersection_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(intersect_method="rsa",
                                                      sync_intersect_ids=True,
                                                      only_output_key=True)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(pipeline.get_component("hetero_lr_0").get_summary())
Ejemplo n.º 29
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
    data_transform_1 = DataTransform(name="data_transform_1")

    federated_sample_0 = FederatedSample(name="federated_sample_0", mode="stratified", method="downsample",
                                         fractions=[[0, 1.0], [1, 1.0]], task_type="h**o")

    homo_binning_0 = HomoFeatureBinning(name='homo_binning_0', sample_bins=10, method="recursive_query")
    homo_binning_1 = HomoFeatureBinning(name='homo_binning_1')

    homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', need_alignment=True)
    homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1')

    homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", tol=0.0001, alpha=1.0,
                       optimizer="rmsprop", max_iter=5)
    homo_lr_1 = HomoLR(name="homo_lr_1")

    local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression",
                                     model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True,
                                                 "solver": "lbfgs", "max_iter": 5, "multi_class": "ovr"})
    local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=True)
    local_baseline_1 = LocalBaseline(name="local_baseline_1")

    homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3)
    homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1", num_trees=3)

    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_1 = Evaluation(name="evaluation_1")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)

    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data),
                           model=Model(model=data_transform_0.output.model))

    pipeline.add_component(federated_sample_0, data=Data(data=data_transform_0.output.data))

    pipeline.add_component(homo_binning_0, data=Data(data=federated_sample_0.output.data))
    pipeline.add_component(homo_binning_1, data=Data(data=data_transform_1.output.data),
                           model=Model(model=homo_binning_0.output.model))

    pipeline.add_component(homo_onehot_0, data=Data(data=homo_binning_0.output.data))
    pipeline.add_component(homo_onehot_1, data=Data(data=homo_binning_1.output.data),
                           model=Model(model=homo_onehot_0.output.model))

    pipeline.add_component(homo_lr_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(homo_lr_1, data=Data(data=homo_onehot_1.output.data),
                           model=Model(model=homo_lr_0.output.model))

    pipeline.add_component(local_baseline_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(local_baseline_1, data=Data(data=homo_onehot_1.output.data),
                           model=Model(model=local_baseline_0.output.model))

    pipeline.add_component(homo_secureboost_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(homo_secureboost_1, data=Data(data=homo_onehot_1.output.data),
                           model=Model(model=homo_secureboost_0.output.model))

    pipeline.add_component(evaluation_0,
                           data=Data(
                               data=[homo_lr_0.output.data, homo_lr_1.output.data,
                                     local_baseline_0.output.data, local_baseline_1.output.data]))
    pipeline.add_component(evaluation_1,
                           data=Data(
                               data=[homo_secureboost_0.output.data, homo_secureboost_1.output.data]))

    pipeline.compile()

    pipeline.fit()

    print(pipeline.get_component("evaluation_0").get_summary())
    print(pipeline.get_component("evaluation_1").get_summary())
def make_normal_dsl(config, namespace):
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]
    arbiter = parties.arbiter[0]
    guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True)

    scale_0 = FeatureScale(name='scale_0')

    homo_sbt_0 = HomoSecureBoost(name="homo_secureboost_0",
                                 num_trees=3,
                                 task_type='classification',
                                 objective_param={"objective": "cross_entropy"},
                                 tree_param={
                                     "max_depth": 3
                                 },
                                 validation_freqs=1
                                 )

    # define Intersection components
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(homo_sbt_0, data=Data(train_data=scale_0.output.data))

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": [
            "homo_sbt_filter"
        ],
        "sbt_param": {
            "metrics": "feature_importance",
            "filter_type": "threshold",
            "take_high": True,
            "threshold": 0.03
        }}
    feature_selection_0 = HeteroFeatureSelection(**selection_param)
    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)
    pipeline.add_component(feature_selection_0, data=Data(data=scale_0.output.data),
                           model=Model(isometric_model=homo_sbt_0.output.model))
    pipeline.add_component(homo_lr_0, data=Data(train_data=feature_selection_0.output.data))
    evaluation_0 = Evaluation(name='evaluation_0')
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()
    return pipeline