import pandas as pd from hyperflow.estimator.base import HyperFlowEstimator from hyperflow.hdl.hdl_constructor import HDL_Constructor from hyperflow.tuner.tuner import Tuner df = pd.read_csv("../data/QSAR.csv") hdl_constructor = HDL_Constructor( DAG_descriptions={ "num->var": "compress.variance", "var->pea": { "_name": "compress.pearson", "n_jobs": 6 }, "pea->target": "logistic_regression" }) tuner = Tuner(run_limit=12, initial_runs=12, search_method="smac") hyperflow_pipeline = HyperFlowEstimator(tuner, hdl_constructor) column_descriptions = {"id": "Name", "target": "labels"} hyperflow_pipeline.fit(X=df, column_descriptions=column_descriptions, n_jobs=3)
from hyperflow.tuner.tuner import Tuner df = pd.read_csv("../examples/classification/train_classification.csv") ss = ShuffleSplit(n_splits=1, random_state=0, test_size=0.25) train_ix, test_ix = next(ss.split(df)) df_train = df.iloc[train_ix, :] df_test = df.iloc[test_ix, :] tuner = Tuner( initial_runs=5, run_limit=12, ) hdl_constructor = HDL_Constructor( DAG_descriptions={ "nan->imp": "impute.fill_abnormal", "imp->{cat_name=cat,num_name=num}": "operate.split.cat_num", "cat->num": "encode.cat_boost|scale.standardize", "num->target": "reduce.pca|lightgbm" }) hyperflow_pipeline = HyperFlowEstimator(tuner, hdl_constructor) column_descriptions = { "id": "PassengerId", "target": "Survived", "ignore": "Name" } hyperflow_pipeline.fit(X=df_train, X_test=df_test, column_descriptions=column_descriptions, n_jobs=1)
if ind.any(): temp = newfeature[:, i] a = temp[~np.isnan(temp)].mean() newfeature[:, i][np.isnan(temp)] = a # 标准化 stdScale = StandardScaler().fit(newfeature) newfeaturenorm = stdScale.transform(newfeature) # 区间化 bins = [-9, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 24] new_range = pd.cut(df.Label, bins) newlabel = np.array(df.Label) return newfeaturenorm, newlabel, new_range x_train, y_train, y_range = data_preprocessing() tuner = Tuner( initial_runs=12, run_limit=120, ) hdl_constructor = HDL_Constructor(DAG_descriptions={"num->target": "lightgbm"}) resource_manager = ResourceManager(os.getcwd() + "/for_hxw_result") hyperflow_pipeline = HyperFlowEstimator(tuner, hdl_constructor, ensemble_builder=False) hyperflow_pipeline.fit(X=x_train, y=y_train, n_jobs=3) joblib.dump(hyperflow_pipeline, "hyperflow_pipeline_for_hxw.bz")