Exemple #1
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    backend = config.backend
    work_mode = config.work_mode

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["manually"],
        "manually_param": {
            "filter_out_indexes": None,
            "filter_out_names": None,
            "left_col_indexes": [0, 1, 2],
            "left_col_names": ["x3"]
        }
    }

    pipeline = common_tools.make_normal_dsl(config, namespace, selection_param)
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    common_tools.prettify(
        pipeline.get_component("hetero_feature_selection_0").get_summary())
Exemple #2
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    backend = config.backend
    work_mode = config.work_mode

    binning_param = {
        "name": 'hetero_feature_binning_0',
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["manually", "iv_value_thres", "iv_percentile"],
        "manually_param": {
            "filter_out_indexes": [],
            "filter_out_names": []
        },
        "unique_param": {
            "eps": 1e-06
        },
        "iv_value_param": {
            "value_threshold": 0.1
        },
        "iv_percentile_param": {
            "percentile_threshold": 0.9
        },
        "variance_coe_param": {
            "value_threshold": 0.3
        },
        "outlier_param": {
            "percentile": 0.95,
            "upper_threshold": 2.0
        }
    }
    pipeline = common_tools.make_single_predict_pipeline(
        config, namespace, selection_param, binning_param=binning_param)
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    common_tools.prettify(
        pipeline.get_component("hetero_feature_selection_0").get_summary())
Exemple #3
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    pipeline = make_normal_dsl(config, namespace)
    pipeline.fit()
    common_tools.prettify(
        pipeline.get_component("hetero_feature_selection_0").get_summary())
    common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    backend = config.backend
    work_mode = config.work_mode
    pipeline = make_normal_dsl(config, namespace)
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary())
    common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())
Exemple #5
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    backend = config.backend
    work_mode = config.work_mode

    fast_sbt_param = {
        "name": "fast_secureboost_0",
        "task_type": "classification",
        "learning_rate": 0.1,
        "num_trees": 4,
        "subsample_feature_rate": 1,
        "n_iter_no_change": False,
        "work_mode": "layered",
        "guest_depth": 2,
        "host_depth": 3,
        "tol": 0.0001,
        "bin_num": 50,
        "metrics": ["Recall", "ks", "auc", "roc"],
        "objective_param": {
            "objective": "cross_entropy"
        },
        "encrypt_param": {
            "method": "iterativeAffine"
        },
        "predict_param": {
            "threshold": 0.5
        },
        "validation_freqs": 1
    }

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["hetero_fast_sbt_filter"],
        "sbt_param": {
            "metrics": "feature_importance",
            "filter_type": "threshold",
            "take_high": True,
            "threshold": 0.03
        }
    }
    pipeline = common_tools.make_normal_dsl(config,
                                            namespace,
                                            selection_param,
                                            fast_sbt_param=fast_sbt_param)
    pipeline.fit(backend=backend, work_mode=work_mode)
    common_tools.prettify(
        pipeline.get_component("hetero_feature_selection_0").get_summary())
Exemple #6
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    backend = config.backend
    work_mode = config.work_mode

    binning_param = {
        "name": 'hetero_feature_binning_0',
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["iv_value_thres", "iv_filter"],
        "iv_value_param": {
            "value_threshold": 1,
            "local_only": True
        },
        "iv_top_k_param": {
            "k": 7,
            "local_only": False
        }
    }
    pipeline = common_tools.make_normal_dsl(config,
                                            namespace,
                                            selection_param,
                                            binning_param=binning_param)
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    common_tools.prettify(
        pipeline.get_component("hetero_feature_selection_0").get_summary())
Exemple #7
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["percentage_value"],
        "percentage_value_param": {
            "upper_pct": 0.8
        }}
    pipeline = common_tools.make_normal_dsl(config, namespace, selection_param)
    pipeline.fit()
    common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary())
Exemple #8
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    binning_param = {
        "name": 'hetero_feature_binning_0',
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    statistic_param = {
        "name": "statistic_0",
        "statistics": ["95%", "coefficient_of_variance", "stddev"],
        "column_indexes": -1,
        "column_names": []
    }

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": [
            "manually",
            "unique_value",
            "iv_value_thres",
            "coefficient_of_variation_value_thres",
            "iv_percentile",
            "outlier_cols"
        ],
        "manually_param": {
            "filter_out_indexes": [],
            "filter_out_names": []
        },
        "unique_param": {
            "eps": 1e-06
        },
        "iv_value_param": {
            "value_threshold": 0.1
        },
        "iv_percentile_param": {
            "percentile_threshold": 0.9
        },
        "variance_coe_param": {
            "value_threshold": 0.3
        },
        "outlier_param": {
            "percentile": 0.95,
            "upper_threshold": 2.0
        }}
    pipeline = common_tools.make_normal_dsl(config, namespace, selection_param,
                                            binning_param=binning_param,
                                            statistic_param=statistic_param)
    pipeline.fit()
    common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary())
Exemple #9
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    backend = config.backend
    work_mode = config.work_mode

    binning_param = {
        "name": 'hetero_feature_binning_0',
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    statistic_param = {
        "name": "statistic_0",
        "statistics": ["95%", "coefficient_of_variance", "stddev"],
        "column_indexes": -1,
        "column_names": []
    }
    psi_param = {"name": "psi_0", "max_bin_num": 20}

    secureboost_param = {
        "name": "secureboost_0",
        "task_type": "classification",
        "learning_rate": 0.1,
        "num_trees": 5,
        "subsample_feature_rate": 1,
        "n_iter_no_change": False,
        "tol": 0.0001,
        "bin_num": 50,
        "objective_param": {
            "objective": "cross_entropy"
        },
        "encrypt_param": {
            "method": "paillier"
        },
        "predict_param": {
            "threshold": 0.5
        },
        "validation_freqs": 1
    }

    selection_param = {
        "name":
        "hetero_feature_selection_0",
        "select_col_indexes":
        -1,
        "select_names": [],
        "filter_methods":
        ["iv_filter", "statistic_filter", "psi_filter", "hetero_sbt_filter"],
        "iv_param": {
            "metrics": ["iv", "iv", "iv"],
            "filter_type": ["threshold", "top_k", "top_percentile"],
            "take_high": True,
            "threshold": [0.03, 15, 0.7],
            "host_thresholds": [[0.15], None, None],
            "select_federated": True
        },
        "statistic_param": {
            "metrics": ["skewness", "skewness", "kurtosis", "median"],
            "filter_type": "threshold",
            "take_high": [True, False, True, True],
            "threshold": [-10, 10, -1.5, -1.5]
        },
        "psi_param": {
            "metrics": "psi",
            "filter_type": "threshold",
            "take_high": False,
            "threshold": -0.1
        },
        "sbt_param": {
            "metrics": "feature_importance",
            "filter_type": "threshold",
            "take_high": True,
            "threshold": 0.03
        }
    }
    pipeline = common_tools.make_normal_dsl(config,
                                            namespace,
                                            selection_param,
                                            binning_param=binning_param,
                                            statistic_param=statistic_param,
                                            psi_param=psi_param,
                                            sbt_param=secureboost_param)
    pipeline.fit(backend=backend, work_mode=work_mode)
    common_tools.prettify(
        pipeline.get_component("hetero_feature_selection_0").get_summary())