예제 #1
0
from hyperflow.tuner.tuner import Tuner

df = pd.read_csv("../examples/classification/train_classification.csv")
ss = ShuffleSplit(n_splits=1, random_state=0, test_size=0.25)
train_ix, test_ix = next(ss.split(df))
df_train = df.iloc[train_ix, :]
df_test = df.iloc[test_ix, :]

tuner = Tuner(
    initial_runs=5,
    run_limit=12,
)
hdl_constructor = HDL_Constructor(
    DAG_descriptions={
        "nan->imp": "impute.fill_abnormal",
        "imp->{cat_name=cat,num_name=num}": "operate.split.cat_num",
        "cat->num": "encode.cat_boost|scale.standardize",
        "num->target": "reduce.pca|lightgbm"
    })
hyperflow_pipeline = HyperFlowEstimator(tuner, hdl_constructor)
column_descriptions = {
    "id": "PassengerId",
    "target": "Survived",
    "ignore": "Name"
}

hyperflow_pipeline.fit(X=df_train,
                       X_test=df_test,
                       column_descriptions=column_descriptions,
                       n_jobs=1)
예제 #2
0
import pandas as pd
from sklearn.model_selection import ShuffleSplit

from hyperflow.estimator.base import HyperFlowEstimator
from hyperflow.tuner.tuner import Tuner

df = pd.read_csv("../examples/classification/train_classification.csv")
ss = ShuffleSplit(n_splits=1, random_state=0, test_size=0.25)
train_ix, test_ix = next(ss.split(df))
df_train = df.iloc[train_ix, :]
df_test = df.iloc[test_ix, :]

tuner = Tuner(
    initial_runs=1,
    run_limit=100,
    n_jobs=1,
    search_method_params={"anneal_func":"lambda x:1*(1/(-(3*(x-1))))"}
)
hyperflow_pipeline = HyperFlowEstimator(tuner)
column_descriptions = {
    "id": "PassengerId",
    "target": "Survived",
    "ignore": "Name"
}
hyperflow_pipeline.fit(
    X=df_train, X_test=df_test, column_descriptions=column_descriptions
)
예제 #3
0
import pandas as pd

from hyperflow.estimator.base import HyperFlowEstimator
from hyperflow.hdl.hdl_constructor import HDL_Constructor
from hyperflow.tuner.tuner import Tuner

df = pd.read_csv("../data/QSAR.csv")

hdl_constructor = HDL_Constructor(
    DAG_descriptions={
        "num->var": "compress.variance",
        "var->pea": {
            "_name": "compress.pearson",
            "n_jobs": 6
        },
        "pea->target": "logistic_regression"
    })
tuner = Tuner(run_limit=12, initial_runs=12, search_method="smac")
hyperflow_pipeline = HyperFlowEstimator(tuner, hdl_constructor)
column_descriptions = {"id": "Name", "target": "labels"}

hyperflow_pipeline.fit(X=df, column_descriptions=column_descriptions, n_jobs=3)
예제 #4
0
boston = load_boston()
data = boston.data
target = boston.target
columns = list(boston.feature_names) + ["target"]
df = pd.DataFrame(np.hstack([data, target[:, None]]), columns=columns)
ss = ShuffleSplit(n_splits=1, random_state=0, test_size=0.25)
train_ix, test_ix = next(ss.split(df))
df_train = df.iloc[train_ix, :]
df_test = df.iloc[test_ix, :]

tuner = Tuner(
    initial_runs=5,
    run_limit=12,
)
hyperflow_pipeline = HyperFlowEstimator(
    tuner,
    HDL_Constructor(
        DAG_descriptions={
            "num->num": [
                "select.from_model_reg",
                "select.univar_reg",
                "select.rfe_reg"  #,None
            ],
            "num->target": ["lightgbm"]
        }))
column_descriptions = {"target": "target"}
hyperflow_pipeline.fit(X=df_train,
                       X_test=df_test,
                       column_descriptions=column_descriptions,
                       n_jobs=1)
예제 #5
0
        if ind.any():
            temp = newfeature[:, i]
            a = temp[~np.isnan(temp)].mean()
            newfeature[:, i][np.isnan(temp)] = a

    # 标准化
    stdScale = StandardScaler().fit(newfeature)
    newfeaturenorm = stdScale.transform(newfeature)

    # 区间化
    bins = [-9, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 24]
    new_range = pd.cut(df.Label, bins)
    newlabel = np.array(df.Label)
    return newfeaturenorm, newlabel, new_range


x_train, y_train, y_range = data_preprocessing()

tuner = Tuner(
    initial_runs=12,
    run_limit=120,
)
hdl_constructor = HDL_Constructor(DAG_descriptions={"num->target": "lightgbm"})
resource_manager = ResourceManager(os.getcwd() + "/for_hxw_result")
hyperflow_pipeline = HyperFlowEstimator(tuner,
                                        hdl_constructor,
                                        ensemble_builder=False)

hyperflow_pipeline.fit(X=x_train, y=y_train, n_jobs=3)
joblib.dump(hyperflow_pipeline, "hyperflow_pipeline_for_hxw.bz")