Example #1
0
def main(config="../../config.yaml", param="./gbdt_config_reg.yaml"):

    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]

    print('config is {}'.format(config))
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
        print('data base dir is', data_base_dir)
    else:
        data_base_dir = config.data_base_dir

    # prepare data
    df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest),
                           index_col=idx)
    df_host = pd.read_csv(os.path.join(data_base_dir, data_host),
                          index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    clf = GradientBoostingRegressor(random_state=0, n_estimators=50)
    clf.fit(X, y)

    y_predict = clf.predict(X)

    result = {"mean_absolute_error": mean_absolute_error(y, y_predict)}
    print(result)
    return {}, result
Example #2
0
def main(config="../../config.yaml", param="./lr_config.yaml"):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    assert isinstance(param, dict)
    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]

    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
    else:
        data_base_dir = config.data_base_dir

    config_param = {
        "penalty": param["penalty"],
        "max_iter": 100,
        "alpha": param["alpha"],
        "learning_rate": "optimal",
        "eta0": param["learning_rate"]
    }

    # prepare data
    df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest),
                               index_col=idx)
    df_host = pandas.read_csv(os.path.join(data_base_dir, data_host),
                              index_col=idx)

    # df_test = pandas.read_csv(data_test, index_col=idx)

    df = pandas.concat([df_guest, df_host], axis=0)

    # df = df_guest.join(df_host, rsuffix="host")
    y_train = df[label_name]
    x_train = df.drop(label_name, axis=1)

    # y_test = df_test[label_name]
    # x_test = df_test.drop(label_name, axis=1)
    x_test, y_test = x_train, y_train

    # lm = LogisticRegression(max_iter=20)
    lm = SGDClassifier(loss="log", **config_param)
    lm_fit = lm.fit(x_train, y_train)
    y_pred = lm_fit.predict(x_test)

    acc = accuracy_score(y_test, y_pred)
    result = {"accuracy": acc}
    print('multi result', result)

    return {}, result
Example #3
0
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml"):
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
    else:
        data_base_dir = config.data_base_dir

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]

    idx = param["idx"]
    label_name = param["label_name"]
    # prepare data
    Xb = pandas.read_csv(os.path.join(data_base_dir, data_guest),
                         index_col=idx)
    Xa = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx)
    y = Xb[label_name]
    if param["loss"] == "categorical_crossentropy":
        labels = y.copy()
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(y)
        y = to_categorical(y)

    Xb = Xb.drop(label_name, axis=1)
    model = build(param, Xb.shape[1], Xa.shape[1])
    model.fit([Xb, Xa],
              y,
              epochs=param["epochs"],
              verbose=0,
              batch_size=param["batch_size"],
              shuffle=True)

    eval_result = {}
    for metric in param["metrics"]:
        if metric.lower() == "auc":
            predict_y = model.predict([Xb, Xa])
            auc = metrics.roc_auc_score(y, predict_y)
            eval_result["auc"] = auc
        elif metric == "accuracy":
            predict_y = np.argmax(model.predict([Xb, Xa]), axis=1)
            predict_y = label_encoder.inverse_transform(predict_y)
            acc = metrics.accuracy_score(y_true=labels, y_pred=predict_y)
            eval_result["accuracy"] = acc

    print(eval_result)
    data_summary = {}
    return data_summary, eval_result
Example #4
0
def main(config="../../config.yaml", param="param_conf.yaml"):
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
    else:
        data_base_dir = config.data_base_dir

    epoch = param["epoch"]
    lr = param["lr"]
    batch_size = param.get("batch_size", -1)
    optimizer_name = param.get("optimizer", "Adam")
    loss = param.get("loss", "categorical_crossentropy")
    metrics = param.get("metrics", ["accuracy"])
    layers = param["layers"]
    is_multy = param["is_multy"]
    data = dataset[param.get("dataset", "vehicle")]

    model = Sequential()
    for layer_config in layers:
        layer = getattr(tensorflow.keras.layers, layer_config["name"])
        layer_params = layer_config["params"]
        model.add(layer(**layer_params))

    model.compile(
        optimizer=getattr(optimizers, optimizer_name)(learning_rate=lr),
        loss=loss,
        metrics=metrics,
    )

    data_path = pathlib.Path(data_base_dir)
    data_with_label = pandas.concat([
        pandas.read_csv(data_path.joinpath(data["guest"]), index_col=0),
        pandas.read_csv(data_path.joinpath(data["host"]), index_col=0),
    ]).values
    data = data_with_label[:, 1:]
    if is_multy:
        labels = to_categorical(data_with_label[:, 0])
    else:
        labels = data_with_label[:, 0]
    if batch_size < 0:
        batch_size = len(data_with_label)
    model.fit(data, labels, epochs=epoch, batch_size=batch_size)
    evaluate = model.evaluate(data, labels)
    metric_summary = {"accuracy": evaluate[1]}
    data_summary = {}
    return data_summary, metric_summary
Example #5
0
def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]
    data_test = param["data_test"]
    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df_test = pd.read_csv(data_test, index_col=idx)

    df = pd.concat([df_guest, df_host], axis=0)
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    X_guest = df_guest.drop(label_name, axis=1)
    y_guest = df_guest[label_name]
    clf = GradientBoostingClassifier(
        n_estimators=50,
        learning_rate=0.3,
    )
    clf.fit(X, y)
    y_pred = clf.predict(X_guest)
    acc = accuracy_score(y_guest, y_pred)
    result = {"accuracy": acc}
    print(result)
    return {}, result
Example #6
0
def main(param="./linr_config.yaml"):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]

    idx = param["idx"]
    label_name = param["label_name"]
    # prepare data
    df_guest = pandas.read_csv(data_guest, index_col=idx)
    df_host = pandas.read_csv(data_host, index_col=idx)
    df = df_guest.join(df_host, rsuffix="host")
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    lm = SGDRegressor(loss="squared_loss",
                      penalty=param["penalty"],
                      fit_intercept=True,
                      max_iter=param["max_iter"],
                      average=param["batch_size"])
    lm_fit = lm.fit(X, y)
    y_pred = lm_fit.predict(X)

    mse = mean_squared_error(y, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y, y_pred)
    explained_var = explained_variance_score(y, y_pred)
    metric_summary = {
        "r2_score": r2,
        "mean_squared_error": mse,
        "root_mean_squared_error": rmse,
        "explained_variance": explained_var
    }
    data_summary = {}
    return data_summary, metric_summary
Example #7
0
def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]

    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)

    clf = GradientBoostingClassifier(
        random_state=0,
        n_estimators=120 if 'epsilon' in data_guest else 50,
        learning_rate=0.1)
    clf.fit(X, y)

    y_prob = clf.predict(X)

    try:
        auc_score = roc_auc_score(y, y_prob)
    except:
        print(f"no auc score available")
        return

    result = {"auc": auc_score}
    print(result)
    return {}, result
Example #8
0
def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    data_test = param["data_test"]
    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df_test = pd.read_csv(data_test, index_col=idx)

    df = pd.concat([df_guest, df_host], axis=0)
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    X = df.drop(label_name, axis=1)
    X_guest = df_guest.drop(label_name, axis=1)
    y_guest = df_guest[label_name]
    clf = GradientBoostingRegressor(n_estimators=50)
    clf.fit(X, y)
    y_predict = clf.predict(X_guest)

    result = {
        "mean_squared_error": mean_squared_error(y_guest, y_predict),
        "mean_absolute_error": mean_absolute_error(y_guest, y_predict)
    }
    print(result)
    return {}, result
Example #9
0
def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]

    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)

    clf = GradientBoostingRegressor(random_state=0,
                                    n_estimators=50,
                                    learning_rate=0.1)
    clf.fit(X, y)

    y_predict = clf.predict(X)

    result = {
        "mean_absolute_error": mean_absolute_error(y, y_predict),
    }
    print(result)
    return {}, result
Example #10
0
def main(config="../../config.yaml", param="./vechile_config.yaml"):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    assert isinstance(param, dict)
    data_guest = param["data_guest"]
    data_host = param["data_host"]

    idx = param["idx"]
    label_name = param["label_name"]

    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
    else:
        data_base_dir = config.data_base_dir

    config_param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "alpha": param["alpha"],
        "learning_rate": "optimal",
        "eta0": param["learning_rate"],
        "random_state": 105
    }

    # prepare data
    df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest),
                               index_col=idx)
    df_host = pandas.read_csv(os.path.join(data_base_dir, data_host),
                              index_col=idx)

    df = df_guest.join(df_host, rsuffix="host")
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    # lm = LogisticRegression(max_iter=20)
    lm = SGDClassifier(loss="log", **config_param, shuffle=False)
    lm_fit = lm.fit(X, y)
    y_pred = lm_fit.predict(X)

    recall = recall_score(y, y_pred, average="macro")
    pr = precision_score(y, y_pred, average="macro")
    acc = accuracy_score(y, y_pred)

    result = {"accuracy": acc}
    print(result)
    return {}, result
Example #11
0
def main(config="../../config.yaml", param="./gbdt_config_binary.yaml"):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]

    print('config is {}'.format(config))
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
        print('data base dir is', data_base_dir)
    else:
        data_base_dir = config.data_base_dir

    # prepare data
    df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest),
                           index_col=idx)
    df_host = pd.read_csv(os.path.join(data_base_dir, data_host),
                          index_col=idx)
    df = pd.concat([df_guest, df_host], axis=0)
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    X_guest = df_guest.drop(label_name, axis=1)
    y_guest = df_guest[label_name]
    clf = GradientBoostingClassifier(
        n_estimators=120 if 'epsilon' in data_guest else 50, learning_rate=0.1)
    clf.fit(X, y)
    y_prob = clf.predict(X_guest)

    try:
        auc_score = roc_auc_score(y_guest, y_prob)
    except:
        print(f"no auc score available")
        return

    result = {"auc": auc_score}
    import time
    print(result)
    print(data_guest)
    time.sleep(3)
    return {}, result
Example #12
0
def main(config="../../config.yaml", param="./gbdt_config_multi.yaml"):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]

    print('config is {}'.format(config))
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
        print('data base dir is', data_base_dir)
    else:
        data_base_dir = config.data_base_dir

    # prepare data
    df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest),
                           index_col=idx)
    df_host = pd.read_csv(os.path.join(data_base_dir, data_host),
                          index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    clf = GradientBoostingClassifier(random_state=0,
                                     n_estimators=50,
                                     learning_rate=0.3)
    clf.fit(X, y)
    y_pred = clf.predict(X)

    try:
        auc_score = roc_auc_score(y, y_pred)
    except BaseException:
        print(f"no auc score available")

    acc = accuracy_score(y, y_pred)
    result = {"accuracy": acc}
    print('multi result', result)
    return {}, result
Example #13
0
def main(config="../../config.yaml", param="./gbdt_config_multi.yaml"):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]

    print('config is {}'.format(config))
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
        print('data base dir is', data_base_dir)
    else:
        data_base_dir = config.data_base_dir

    # prepare data
    df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest),
                           index_col=idx)
    df_host = pd.read_csv(os.path.join(data_base_dir, data_host),
                          index_col=idx)

    df = pd.concat([df_guest, df_host], axis=0)
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    X_guest = df_guest.drop(label_name, axis=1)
    y_guest = df_guest[label_name]
    clf = GradientBoostingClassifier(
        n_estimators=50,
        learning_rate=0.3,
    )
    clf.fit(X, y)
    y_pred = clf.predict(X_guest)
    acc = accuracy_score(y_guest, y_pred)
    result = {"accuracy": acc}
    print(result)
    return {}, result
Example #14
0
def main(param):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    assert isinstance(param, dict)
    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]


    config_param = {
        "penalty": param["penalty"],
        "max_iter": 100,
        "alpha": param["alpha"],
        "learning_rate": "optimal",
        "eta0": param["learning_rate"]
    }

    # prepare data
    df_guest = pandas.read_csv(data_guest, index_col=idx)
    df_host = pandas.read_csv(data_host, index_col=idx)
    df = df_guest.join(df_host, rsuffix="host")
    y = df[label_name]
    X = df.drop(label_name, axis=1)

    # x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
    x_train, x_test, y_train, y_test = X, X, y, y

    # lm = LogisticRegression(max_iter=20)
    lm = SGDClassifier(loss="log", **config_param)
    lm_fit = lm.fit(x_train, y_train)
    y_pred = lm_fit.predict(x_test)
    y_prob = lm_fit.predict_proba(x_test)[:, 1]
    try:
        auc_score = roc_auc_score(y_test, y_prob)
    except:
        print(f"no auc score available")
        return
    recall = recall_score(y_test, y_pred, average="macro")
    pr = precision_score(y_test, y_pred, average="macro")
    acc = accuracy_score(y_test, y_pred)
    # y_predict_proba = est.predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_test, y_prob)

    ks = max(tpr - fpr)
    result = {"auc": auc_score, "recall": recall, "precision": pr, "accuracy": acc, "ks": ks}
    print(result)
    print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
    return {}, result
Example #15
0
def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]
    data_test = param["data_test"]

    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df = pd.concat([df_guest, df_host], axis=0)
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    X_guest = df_guest.drop(label_name, axis=1)
    y_guest = df_guest[label_name]
    clf = GradientBoostingClassifier(
        n_estimators=120 if 'epsilon' in data_guest else 50, learning_rate=0.1)
    clf.fit(X, y)
    y_prob = clf.predict(X_guest)

    try:
        auc_score = roc_auc_score(y_guest, y_prob)
    except:
        print(f"no auc score available")
        return

    result = {"auc": auc_score}
    import time
    print(result)
    print(data_guest)
    time.sleep(3)
    return {}, result
Example #16
0
def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    assert isinstance(param, dict)

    data_set = param.get("data_guest").split('/')[-1]
    if data_set == "default_credit_hetero_guest.csv":
        guest_data_table = 'default_credit_hetero_guest'
        host_data_table = 'default_credit_hetero_host'
    elif data_set == 'breast_hetero_guest.csv':
        guest_data_table = 'breast_hetero_guest'
        host_data_table = 'breast_hetero_host'
    elif data_set == 'give_credit_hetero_guest.csv':
        guest_data_table = 'give_credit_hetero_guest'
        host_data_table = 'give_credit_hetero_host'
    elif data_set == 'epsilon_5k_hetero_guest.csv':
        guest_data_table = 'epsilon_5k_hetero_guest'
        host_data_table = 'epsilon_5k_hetero_host'
    else:
        raise ValueError(f"Cannot recognized data_set: {data_set}")

    guest_train_data = {
        "name": guest_data_table,
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": host_data_table,
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    lr_param = {
        "validation_freqs": None,
        "early_stopping_rounds": None,
    }

    config_param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "alpha": param["alpha"],
        "learning_rate": param["learning_rate"],
        "optimizer": param["optimizer"],
        "batch_size": param["batch_size"],
        "early_stop": "diff",
        "tol": 1e-5,
        "init_param": {
            "init_method": param.get("init_method", 'random_uniform')
        }
    }
    lr_param.update(config_param)
    print(f"lr_param: {lr_param}, data_set: {data_set}")
    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param)

    evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary")

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    print(pipeline.get_component("evaluation_0").get_summary())
    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }
    result_summary = pipeline.get_component("evaluation_0").get_summary()
    return data_summary, result_summary
Example #17
0
def main(config="../../config.yaml", param="./lr_config.yaml"):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    assert isinstance(param, dict)
    data_guest = param["data_guest"]
    data_host = param["data_host"]
    data_test = param["data_test"]
    idx = param["idx"]
    label_name = param["label_name"]

    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
    else:
        data_base_dir = config.data_base_dir

    config_param = {
        "penalty": param["penalty"],
        "max_iter": 100,
        "alpha": param["alpha"],
        "learning_rate": "optimal",
        "eta0": param["learning_rate"]
    }

    # prepare data
    df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest),
                               index_col=idx)
    df_host = pandas.read_csv(os.path.join(data_base_dir, data_host),
                              index_col=idx)

    # df_test = pandas.read_csv(data_test, index_col=idx)

    df = pandas.concat([df_guest, df_host], axis=0)

    # df = df_guest.join(df_host, rsuffix="host")
    y_train = df[label_name]
    x_train = df.drop(label_name, axis=1)

    # y_test = df_test[label_name]
    # x_test = df_test.drop(label_name, axis=1)
    x_test, y_test = x_train, y_train

    # lm = LogisticRegression(max_iter=20)
    lm = SGDClassifier(loss="log", **config_param)
    lm_fit = lm.fit(x_train, y_train)
    y_pred = lm_fit.predict(x_test)
    y_prob = lm_fit.predict_proba(x_test)[:, 1]
    auc_score = roc_auc_score(y_test, y_prob)
    recall = recall_score(y_test, y_pred, average="macro")
    pr = precision_score(y_test, y_pred, average="macro")
    acc = accuracy_score(y_test, y_pred)
    # y_predict_proba = est.predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_test, y_prob)
    ks = max(tpr - fpr)
    result = {"auc": auc_score}
    print(f"result: {result}")
    print(
        f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}"
    )

    return {}, result
Example #18
0
def main(config="../../config.yaml", param="param_conf.yaml", namespace=""):
    num_host = 1

    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    epoch = param["epoch"]
    lr = param["lr"]
    batch_size = param.get("batch_size", -1)
    optimizer_name = param.get("optimizer", "Adam")
    encode_label = param.get("encode_label", True)
    loss = param.get("loss", "categorical_crossentropy")
    metrics = param.get("metrics", ["accuracy"])
    layers = param["layers"]
    data = getattr(dataset, param.get("dataset", "vehicle"))

    guest_train_data = data["guest"]
    host_train_data = data["host"][:num_host]
    for d in [guest_train_data, *host_train_data]:
        d["namespace"] = f"{d['namespace']}{namespace}"

    hosts = config.parties.host[:num_host]
    pipeline = PipeLine() \
        .set_initiator(role='guest', party_id=config.parties.guest[0]) \
        .set_roles(guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=config.parties.guest[0]).algorithm_param(
            table=guest_train_data)
    for i in range(num_host):
        reader_0.get_party_instance(role='host', party_id=hosts[i]) \
            .algorithm_param(table=host_train_data[i])

    dataio_0 = DataIO(name="dataio_0", with_label=True)
    dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \
        .algorithm_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role='host', party_id=hosts).algorithm_param(with_label=True)

    homo_nn_0 = HomoNN(name="homo_nn_0",
                       encode_label=encode_label,
                       max_iter=epoch,
                       batch_size=batch_size,
                       early_stop={
                           "early_stop": "diff",
                           "eps": 0.0
                       })
    for layer_config in layers:
        layer = getattr(tensorflow.keras.layers, layer_config["name"])
        layer_params = layer_config["params"]
        homo_nn_0.add(layer(**layer_params))
        homo_nn_0.compile(optimizer=getattr(optimizers,
                                            optimizer_name)(learning_rate=lr),
                          metrics=metrics,
                          loss=loss)

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type="multi",
                              metrics=["accuracy", "precision", "recall"])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(homo_nn_0,
                           data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data))
    pipeline.compile()
    pipeline.fit(backend=config.backend, work_mode=config.work_mode)
    metric_summary = pipeline.get_component("evaluation_0").get_summary()
    data_summary = dict(train={
        "guest": guest_train_data["name"],
        **{f"host_{i}": host_train_data[i]["name"]
           for i in range(num_host)}
    },
                        test={
                            "guest": guest_train_data["name"],
                            **{
                                f"host_{i}": host_train_data[i]["name"]
                                for i in range(num_host)
                            }
                        })
    return data_summary, metric_summary
Example #19
0
def main(config="../../config.yaml",
         param="./xgb_config_binary.yaml",
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {
        "name": param['data_guest_train'],
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": param['data_host_train'],
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": param['data_guest_val'],
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": param['data_host_val'],
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).algorithm_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role="host", party_id=host).algorithm_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(
        role="host", party_id=host).algorithm_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_sbt_0 = HeteroFastSecureBoost(
        name="hetero_fast_sbt_0",
        num_trees=param['tree_num'],
        task_type=param['task_type'],
        objective_param={"objective": param['loss_func']},
        encrypt_param={"method": "iterativeAffine"},
        tree_param={"max_depth": param['tree_depth']},
        validation_freqs=1,
        subsample_feature_rate=1,
        learning_rate=param['learning_rate'],
        guest_depth=param['guest_depth'],
        host_depth=param['host_depth'],
        tree_num_per_party=param['tree_num_per_party'],
        work_mode=param['work_mode'])

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_fast_sbt_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_fast_sbt_0.output.data))

    pipeline.compile()
    pipeline.fit(backend=backend, work_mode=work_mode)

    return {}, pipeline.get_component("evaluation_0").get_summary()
Example #20
0
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {"name": param['data_guest_train'], "namespace": f"experiment{namespace}"}
    host_train_data = {"name": param['data_host_train'], "namespace": f"experiment{namespace}"}
    guest_validate_data = {"name": param['data_guest_val'], "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": param['data_host_val'], "namespace": f"experiment{namespace}"}

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,)

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense")
    dataio_1.get_party_instance(role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_sbt_0 = HeteroFastSecureBoost(name="hetero_fast_sbt_0",
                                              num_trees=param['tree_num'],
                                              task_type=param['task_type'],
                                              objective_param={"objective": param['loss_func']},
                                              encrypt_param={"method": "iterativeAffine"},
                                              tree_param={"max_depth": param['tree_depth']},
                                              validation_freqs=1,
                                              subsample_feature_rate=1,
                                              learning_rate=param['learning_rate'],
                                              guest_depth=param['guest_depth'],
                                              host_depth=param['host_depth'],
                                              tree_num_per_party=param['tree_num_per_party'],
                                              work_mode=param['work_mode']
                                              )
    hetero_fast_sbt_1 = HeteroFastSecureBoost(name="hetero_fast_sbt_1")
    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data,
                                                        validate_data=intersect_1.output.data))
    pipeline.add_component(hetero_fast_sbt_1, data=Data(test_data=intersect_1.output.data),
                           model=Model(hetero_fast_sbt_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    sbt_0_data = pipeline.get_component("hetero_fast_sbt_0").get_output_data().get("data")
    sbt_1_data = pipeline.get_component("hetero_fast_sbt_1").get_output_data().get("data")
    sbt_0_score = extract_data(sbt_0_data, "predict_result")
    sbt_0_label = extract_data(sbt_0_data, "label")
    sbt_1_score = extract_data(sbt_1_data, "predict_result")
    sbt_1_label = extract_data(sbt_1_data, "label")
    sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True)
    sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True)
    metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    if param['eval_type'] == "regression":
        desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score)
        desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score)
        metric_summary["script_metrics"] = {"hetero_fast_sbt_train": desc_sbt_0,
                                            "hetero_fast_sbt_validate": desc_sbt_1}
    elif param['eval_type'] == "binary":
        metric_sbt = {
            "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label),
            "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score),
            "mAP_D_value": classification_metric.AveragePrecisionScore().compute(sbt_0_score, sbt_1_score, sbt_0_label,
                                                                                 sbt_1_label)}
        metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt}
    elif param['eval_type'] == "multi":
        metric_sbt = {
            "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label)}
        metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt}

    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }

    return data_summary, metric_summary
Example #21
0
def main(config="../../config.yaml",
         param='./xgb_config_binary.yaml',
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": param['data_guest_train'],
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": param['data_guest_val'],
        "namespace": f"experiment{namespace}"
    }

    host_train_data = {
        "name": param['data_host_train'],
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": param['data_host_val'],
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_validate_data)
    dataio_1.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=param['tree_num'],
        task_type=param['task_type'],
        objective_param={"objective": param['loss_func']},
        tree_param={"max_depth": param['tree_depth']},
        validation_freqs=1,
        subsample_feature_rate=1,
        learning_rate=param['learning_rate'],
        bin_num=50)
    homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1")

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(homo_secureboost_0,
                           data=Data(train_data=dataio_0.output.data,
                                     validate_data=dataio_1.output.data))
    pipeline.add_component(homo_secureboost_1,
                           data=Data(test_data=dataio_1.output.data),
                           model=Model(homo_secureboost_0.output.model))
    pipeline.add_component(evaluation_0,
                           data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    sbt_0_data = pipeline.get_component(
        "homo_secureboost_0").get_output_data().get("data")
    sbt_1_data = pipeline.get_component(
        "homo_secureboost_1").get_output_data().get("data")
    sbt_0_score = extract_data(sbt_0_data, "predict_result")
    sbt_0_label = extract_data(sbt_0_data, "label")
    sbt_1_score = extract_data(sbt_1_data, "predict_result")
    sbt_1_label = extract_data(sbt_1_data, "label")
    sbt_0_score_label = extract_data(sbt_0_data,
                                     "predict_result",
                                     keep_id=True)
    sbt_1_score_label = extract_data(sbt_1_data,
                                     "predict_result",
                                     keep_id=True)
    metric_summary = parse_summary_result(
        pipeline.get_component("evaluation_0").get_summary())
    if param['eval_type'] == "regression":
        desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score)
        desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score)
        metric_summary["script_metrics"] = {
            "sbt_train": desc_sbt_0,
            "sbt_validate": desc_sbt_1
        }
    elif param['eval_type'] == "binary":
        metric_sbt = {
            "score_diversity_ratio":
            classification_metric.Distribution.compute(sbt_0_score_label,
                                                       sbt_1_score_label),
            "ks_2samp":
            classification_metric.KSTest.compute(sbt_0_score, sbt_1_score),
            "mAP_D_value":
            classification_metric.AveragePrecisionScore().compute(
                sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label)
        }
        metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt}
    elif param['eval_type'] == "multi":
        metric_sbt = {
            "score_diversity_ratio":
            classification_metric.Distribution.compute(sbt_0_score_label,
                                                       sbt_1_score_label)
        }
        metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt}

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_validate_data["name"],
            "host": host_validate_data["name"]
        }
    }

    return data_summary, metric_summary
Example #22
0
def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    """
    guest = 9999
    host = 10000
    arbiter = 9999
    backend = 0
    work_mode = 1
    param = {"penalty": "L2", "max_iter": 5}
    """

    guest_train_data = {
        "name": "motor_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "motor_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(
        with_label=True,
        output_format="dense",
        label_name=param["label_name"],
        label_type="float")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=host).algorithm_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    param = {
        "penalty": param["penalty"],
        "validation_freqs": None,
        "early_stopping_rounds": None,
        "max_iter": param["max_iter"],
        "optimizer": param["optimizer"],
        "learning_rate": param["learning_rate"],
        "init_param": param["init_param"],
        "batch_size": param["batch_size"],
        "alpha": param["alpha"]
    }

    hetero_linr_0 = HeteroLinR(name='hetero_linr_0', **param)

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type="regression",
                              metrics=[
                                  "r2_score", "mean_squared_error",
                                  "root_mean_squared_error",
                                  "explained_variance"
                              ])

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_linr_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_linr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)

    metric_summary = pipeline.get_component("evaluation_0").get_summary()
    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }
    return data_summary, metric_summary
Example #23
0
def main(config="../../config.yaml", param="param_conf.yaml", namespace=""):
    num_host = 1

    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    epoch = param["epoch"]
    lr = param["lr"]
    batch_size = param.get("batch_size", -1)
    optimizer_name = param.get("optimizer", "Adam")
    encode_label = param.get("encode_label", True)
    loss = param.get("loss", "categorical_crossentropy")
    metrics = param.get("metrics", ["accuracy"])
    layers = param["layers"]
    data = getattr(dataset, param.get("dataset", "vehicle"))

    guest_train_data = data["guest"]
    host_train_data = data["host"][:num_host]
    for d in [guest_train_data, *host_train_data]:
        d["namespace"] = f"{d['namespace']}{namespace}"

    hosts = config.parties.host[:num_host]
    pipeline = PipeLine() \
        .set_initiator(role='guest', party_id=config.parties.guest[0]) \
        .set_roles(guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=config.parties.guest[0]).component_param(table=guest_train_data)
    for i in range(num_host):
        reader_0.get_party_instance(role='host', party_id=hosts[i]) \
            .component_param(table=host_train_data[i])

    dataio_0 = DataIO(name="dataio_0", with_label=True)
    dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \
        .component_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True)

    homo_nn_0 = HomoNN(name="homo_nn_0", encode_label=encode_label, max_iter=epoch, batch_size=batch_size,
                       early_stop={"early_stop": "diff", "eps": 0.0})
    for layer_config in layers:
        layer = getattr(tensorflow.keras.layers, layer_config["name"])
        layer_params = layer_config["params"]
        homo_nn_0.add(layer(**layer_params))
        homo_nn_0.compile(optimizer=getattr(optimizers, optimizer_name)(learning_rate=lr), metrics=metrics,
                          loss=loss)
    homo_nn_1 = HomoNN(name="homo_nn_1")
    if param["loss"] == "categorical_crossentropy":
        eval_type = "multi"
    else:
        eval_type = "binary"
    evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi", metrics=["accuracy", "precision", "recall"])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(homo_nn_0, data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(homo_nn_1, data=Data(test_data=dataio_0.output.data),
                           model=Model(homo_nn_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data))
    pipeline.compile()
    job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode)
    pipeline.fit(job_parameters)
    metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    nn_0_data = pipeline.get_component("homo_nn_0").get_output_data().get("data")
    nn_1_data = pipeline.get_component("homo_nn_1").get_output_data().get("data")
    nn_0_score = extract_data(nn_0_data, "predict_result")
    nn_0_label = extract_data(nn_0_data, "label")
    nn_1_score = extract_data(nn_1_data, "predict_result")
    nn_1_label = extract_data(nn_1_data, "label")
    nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True)
    nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True)
    if eval_type == "binary":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label),
            "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score),
            "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label,
                                                                                 nn_1_label)}
        metric_summary["distribution_metrics"] = {"homo_nn": metric_nn}
    elif eval_type == "multi":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)}
        metric_summary["distribution_metrics"] = {"homo_nn": metric_nn}

    data_summary = dict(
        train={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}},
        test={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}}
    )
    return data_summary, metric_summary
Example #24
0
def main(config="../../config.yaml",
         param="./hetero_nn_breast_config.yaml",
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": param["guest_table_name"],
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": param["host_table_name"],
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    hetero_nn_0 = HeteroNN(name="hetero_nn_0",
                           epochs=param["epochs"],
                           interactive_layer_lr=param["learning_rate"],
                           batch_size=param["batch_size"],
                           early_stop="diff")
    hetero_nn_0.add_bottom_model(
        Dense(units=param["bottom_layer_units"],
              input_shape=(10, ),
              activation="tanh",
              kernel_initializer=initializers.RandomUniform(minval=-1,
                                                            maxval=1,
                                                            seed=123)))
    hetero_nn_0.set_interactve_layer(
        Dense(units=param["interactive_layer_units"],
              input_shape=(param["bottom_layer_units"], ),
              activation="relu",
              kernel_initializer=initializers.RandomUniform(minval=-1,
                                                            maxval=1,
                                                            seed=123)))
    hetero_nn_0.add_top_model(
        Dense(units=param["top_layer_units"],
              input_shape=(param["interactive_layer_units"], ),
              activation=param["top_act"],
              kernel_initializer=initializers.RandomUniform(minval=-1,
                                                            maxval=1,
                                                            seed=123)))
    opt = getattr(optimizers, param["opt"])(lr=param["learning_rate"])
    hetero_nn_0.compile(optimizer=opt,
                        metrics=param["metrics"],
                        loss=param["loss"])

    if param["loss"] == "categorical_crossentropy":
        eval_type = "multi"
    else:
        eval_type = "binary"

    evaluation_0 = Evaluation(name="evaluation_0", eval_type=eval_type)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_nn_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_nn_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }
    return data_summary, pipeline.get_component("evaluation_0").get_summary()
Example #25
0
def main(config="../../config.yaml",
         param='./xgb_config_binary.yaml',
         namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": param['data_guest_train'],
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": param['data_guest_val'],
        "namespace": f"experiment{namespace}"
    }

    host_train_data = {
        "name": param['data_host_train'],
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": param['data_host_val'],
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_validate_data)
    dataio_1.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=param['tree_num'],
        task_type=param['task_type'],
        objective_param={"objective": param['loss_func']},
        tree_param={"max_depth": param['tree_depth']},
        validation_freqs=1,
        subsample_feature_rate=1,
        learning_rate=param['learning_rate'],
        bin_num=50)

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(homo_secureboost_0,
                           data=Data(train_data=dataio_0.output.data,
                                     validate_data=dataio_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }

    return data_summary, pipeline.get_component('evaluation_0').get_summary()
Example #26
0
def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    assert isinstance(param, dict)

    data_set = param.get("data_guest").split('/')[-1]
    if data_set == "default_credit_hetero_guest.csv":
        guest_data_table = 'default_credit_hetero_guest'
        host_data_table = 'default_credit_hetero_host'
    elif data_set == 'breast_hetero_guest.csv':
        guest_data_table = 'breast_hetero_guest'
        host_data_table = 'breast_hetero_host'
    elif data_set == 'give_credit_hetero_guest.csv':
        guest_data_table = 'give_credit_hetero_guest'
        host_data_table = 'give_credit_hetero_host'
    elif data_set == 'epsilon_5k_hetero_guest.csv':
        guest_data_table = 'epsilon_5k_hetero_guest'
        host_data_table = 'epsilon_5k_hetero_host'
    else:
        raise ValueError(f"Cannot recognized data_set: {data_set}")

    guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
    host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    lr_param = {
    }

    config_param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "alpha": param["alpha"],
        "learning_rate": param["learning_rate"],
        "optimizer": param["optimizer"],
        "batch_size": param["batch_size"],
        "shuffle": False,
        "masked_rate": 0,
        "early_stop": "diff",
        "tol": 1e-5,
        "floating_point_precision": param.get("floating_point_precision"),
        "init_param": {
            "init_method": param.get("init_method", 'random_uniform'),
            "random_seed": param.get("random_seed", 103)
        }
    }
    lr_param.update(config_param)
    print(f"lr_param: {lr_param}, data_set: {data_set}")
    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param)
    hetero_lr_1 = HeteroLR(name='hetero_lr_1')

    evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary")

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data),
                           model=Model(hetero_lr_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters()
    pipeline.fit(job_parameters)
    lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get("data")
    lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get("data")
    lr_0_score = extract_data(lr_0_data, "predict_result")
    lr_0_label = extract_data(lr_0_data, "label")
    lr_1_score = extract_data(lr_1_data, "predict_result")
    lr_1_label = extract_data(lr_1_data, "label")
    lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
    lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
    result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    metric_lr = {
        "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label),
        "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score),
        "mAP_D_value": classification_metric.AveragePrecisionScore().compute(lr_0_score, lr_1_score, lr_0_label,
                                                                             lr_1_label)}
    result_summary["distribution_metrics"] = {"hetero_lr": metric_lr}

    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }

    return data_summary, result_summary
Example #27
0
def main(config="../../config.yaml",
         param="./sshe_linr_config.yaml",
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    guest_train_data = {
        "name": "motor_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "motor_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(
        with_label=True,
        output_format="dense",
        label_name=param["label_name"],
        label_type="float")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "optimizer": param["optimizer"],
        "learning_rate": param["learning_rate"],
        "init_param": param["init_param"],
        "batch_size": param["batch_size"],
        "alpha": param["alpha"],
        "early_stop": param["early_stop"],
        "reveal_strategy": param["reveal_strategy"],
        "tol": 1e-6,
        "reveal_every_iter": True
    }

    hetero_sshe_linr_0 = HeteroSSHELinR(name='hetero_sshe_linr_0', **param)
    hetero_sshe_linr_1 = HeteroSSHELinR(name='hetero_sshe_linr_1')

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type="regression",
                              metrics=[
                                  "r2_score", "mean_squared_error",
                                  "root_mean_squared_error",
                                  "explained_variance"
                              ])

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_sshe_linr_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_sshe_linr_1,
                           data=Data(test_data=intersection_0.output.data),
                           model=Model(hetero_sshe_linr_0.output.model))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_sshe_linr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()

    metric_summary = parse_summary_result(
        pipeline.get_component("evaluation_0").get_summary())

    data_linr_0 = extract_data(
        pipeline.get_component("hetero_sshe_linr_0").get_output_data().get(
            "data"), "predict_result")
    data_linr_1 = extract_data(
        pipeline.get_component("hetero_sshe_linr_1").get_output_data().get(
            "data"), "predict_result")
    desc_linr_0 = regression_metric.Describe().compute(data_linr_0)
    desc_linr_1 = regression_metric.Describe().compute(data_linr_1)

    metric_summary["script_metrics"] = {
        "linr_train": desc_linr_0,
        "linr_validate": desc_linr_1
    }

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }
    return data_summary, metric_summary
Example #28
0
def main(config="../../config.yaml",
         param="./vechile_config.yaml",
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    assert isinstance(param, dict)
    """
    guest = 9999
    host = 10000
    arbiter = 9999
    backend = 0
    work_mode = 1
    param = {"penalty": "L2", "max_iter": 5}
    """
    data_set = param.get("data_guest").split('/')[-1]
    if data_set == "vehicle_scale_hetero_guest.csv":
        guest_data_table = 'vehicle_scale_hetero_guest'
        host_data_table = 'vehicle_scale_hetero_host'
    else:
        raise ValueError(f"Cannot recognized data_set: {data_set}")

    guest_train_data = {
        "name": guest_data_table,
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": host_data_table,
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    # define Intersection component
    intersection_0 = Intersection(name="intersection_0")

    lr_param = {
        "validation_freqs": None,
        "early_stopping_rounds": None,
    }

    config_param = {
        "penalty": param["penalty"],
        "max_iter": param["max_iter"],
        "alpha": param["alpha"],
        "learning_rate": param["learning_rate"],
        "optimizer": param["optimizer"],
        "batch_size": param["batch_size"],
        "early_stop": "diff",
        "init_param": {
            "init_method": param.get("init_method", 'random_uniform'),
            "random_seed": param.get("random_seed", 103)
        }
    }
    lr_param.update(config_param)
    print(f"lr_param: {lr_param}, data_set: {data_set}")
    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param)
    hetero_lr_1 = HeteroLR(name='hetero_lr_1')

    evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi")

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_lr_1,
                           data=Data(test_data=intersection_0.output.data),
                           model=Model(hetero_lr_0.output.model))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary

    result_summary = parse_summary_result(
        pipeline.get_component("evaluation_0").get_summary())
    lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get(
        "data")
    lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get(
        "data")
    lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
    lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
    metric_lr = {
        "score_diversity_ratio":
        classification_metric.Distribution.compute(lr_0_score_label,
                                                   lr_1_score_label)
    }
    result_summary["distribution_metrics"] = {"hetero_lr": metric_lr}

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }
    return data_summary, result_summary
Example #29
0
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {"name": param["guest_table_name"], "namespace": f"experiment{namespace}"}
    host_train_data = {"name": param["host_table_name"], "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True)
    dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=param["epochs"],
                           interactive_layer_lr=param["learning_rate"], batch_size=param["batch_size"],
                           early_stop="diff")
    hetero_nn_0.add_bottom_model(Dense(units=param["bottom_layer_units"], input_shape=(10,), activation="tanh",
                                       kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123)))
    hetero_nn_0.set_interactve_layer(
        Dense(units=param["interactive_layer_units"], input_shape=(param["bottom_layer_units"],), activation="relu",
              kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123)))
    hetero_nn_0.add_top_model(
        Dense(units=param["top_layer_units"], input_shape=(param["interactive_layer_units"],),
              activation=param["top_act"],
              kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123)))
    opt = getattr(optimizers, param["opt"])(lr=param["learning_rate"])
    hetero_nn_0.compile(optimizer=opt, metrics=param["metrics"],
                        loss=param["loss"])
    hetero_nn_1 = HeteroNN(name="hetero_nn_1")

    if param["loss"] == "categorical_crossentropy":
        eval_type = "multi"
    else:
        eval_type = "binary"

    evaluation_0 = Evaluation(name="evaluation_0", eval_type=eval_type)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data),
                           model=Model(hetero_nn_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data))

    pipeline.compile()

    pipeline.fit()

    nn_0_data = pipeline.get_component("hetero_nn_0").get_output_data().get("data")
    nn_1_data = pipeline.get_component("hetero_nn_1").get_output_data().get("data")
    nn_0_score = extract_data(nn_0_data, "predict_result")
    nn_0_label = extract_data(nn_0_data, "label")
    nn_1_score = extract_data(nn_1_data, "predict_result")
    nn_1_label = extract_data(nn_1_data, "label")
    nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True)
    nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True)
    metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    if eval_type == "binary":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label),
            "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score),
            "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label,
                                                                                 nn_1_label)}
        metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn}
    elif eval_type == "multi":
        metric_nn = {
            "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)}
        metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn}

    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }
    return data_summary, metric_summary