Beispiel #1
0
 def __init__(
         self,
         tuner: Optional[SmacPipelineTuner] = None,  # 抽象化的优化的全过程
         hdl_constructor: Optional[HDL_Constructor] = None,  # 用户自定义初始超参
         resource_manager: Union[ResourceManager, str] = None,
         ensemble_builder: Union[StackEnsembleBuilder, None, bool,
                                 int] = None):
     if ensemble_builder is None:
         print("info: 使用默认的stack集成学习器")
         ensemble_builder = StackEnsembleBuilder()
     elif ensemble_builder == False:
         print("info: 不使用集成学习")
     else:
         ensemble_builder = StackEnsembleBuilder(set_model=ensemble_builder)
     self.ensemble_builder = ensemble_builder
     if not tuner:
         tuner = SmacPipelineTuner()
     self.tuner: SmacPipelineTuner = tuner
     if not hdl_constructor:
         hdl_constructor = HDL_Constructor()
     if isinstance(hdl_constructor, dict):
         # todo: 使用用户自定义超参描述语言
         print("使用用户自定义超参描述语言")
     self.hdl_constructor = hdl_constructor
     self.random_state = tuner.random_state
     if isinstance(resource_manager, str):
         resource_manager = ResourceManager(
             resource_manager)  # todo : 识别不同协议的文件系统,例如hdfs
     elif resource_manager is None:
         resource_manager = ResourceManager()
     self.resource_manager = resource_manager
     self.estimator = None
Beispiel #2
0
import pandas as pd
from sklearn.model_selection import ShuffleSplit

from autopipeline.estimator.base import AutoPipelineEstimator
from autopipeline.tuner.smac_tuner import SmacPipelineTuner

df = pd.read_csv("../examples/classification/train_classification.csv")
ss = ShuffleSplit(n_splits=1, random_state=0, test_size=0.25)
train_ix, test_ix = next(ss.split(df))
df_train = df.iloc[train_ix, :]
df_test = df.iloc[test_ix, :]

tuner = SmacPipelineTuner(
    random_state=42,
    initial_runs=5,
    runcount_limit=12,
)
auto_pipeline = AutoPipelineEstimator(tuner)
column_descriptions = {
    "id": "PassengerId",
    "target": "Survived",
    "ignore": "Name"
}
auto_pipeline.fit(X=df_train,
                  X_test=df_test,
                  column_descriptions=column_descriptions,
                  n_jobs=1)
Beispiel #3
0
estimators = []
for i in range(100):
    hdl2phps = SmacHDL2PHPS()
    hdl2phps.set_task(constants.binary_classification_task)
    phps = hdl2phps(HDL)
    # print(phps)
    php = phps.sample_configuration()
    # print(php)
    php2dhp = SmacPHP2DHP()
    dhp = php2dhp(php)
    # i+=1
    estimators.append(list(dhp["estimator"].keys())[0])
    pprint(dhp)
    break
# print(Counter(estimators))
tuner = SmacPipelineTuner()
tuner.set_task(constants.binary_classification_task)
preprocessor = tuner.create_preprocessor(dhp)
estimators = tuner.create_estimator(dhp)
pipeline = concat_pipeline(preprocessor, estimators)
print(pipeline)
df = pd.read_csv("../examples/classification/train_classification.csv")
y = df.pop("Survived").values
df = df.loc[:, [
    "Pclass", "Name", "Sex", "Age", "SibSp", "Ticket", "Fare", "Cabin",
    "Embarked"
]]
df_train, df_test, y_train, y_test = train_test_split(df,
                                                      y,
                                                      test_size=0.2,
                                                      random_state=10)