Ejemplo n.º 1
0
 def setUp(self) -> None:
     super(TestScaler, self).setUp()
     boston = load_boston()
     np.random.seed(10)
     ix = np.random.permutation(boston.data.shape[0])
     L = int(len(ix) * 0.8)
     train_ix = ix[:L]
     test_ix = ix[L:]
     X_train = pd.DataFrame(boston.data[train_ix, :],
                            columns=boston.feature_names)
     y_train = boston.target[train_ix]
     X_test = pd.DataFrame(boston.data[test_ix, :],
                           columns=boston.feature_names)
     y_test = boston.target[test_ix]
     pipe = AutoFlowRegressor(
         # consider_ordinal_as_cat=True,
         resource_manager=self.mock_resource_manager)
     pipe.fit(
         X_train=X_train,
         y_train=y_train,
         X_test=X_test,
         y_test=y_test,
         is_not_realy_run=True,
     )
     self.X_train = pipe.data_manager.X_train
     self.X_train.index = train_ix
     self.y_train = pipe.data_manager.y_train
     self.X_test = pipe.data_manager.X_test
     self.X_test.index = test_ix
     self.y_test = pipe.data_manager.y_test
     self.index = deepcopy(train_ix)
Ejemplo n.º 2
0
 def test_single_regressor_with_X_test(self):
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     pipe = AutoFlowRegressor(DAG_workflow={"num->target": ["elasticnet"]},
                              initial_runs=1,
                              run_limit=1,
                              debug=True,
                              resource_manager=self.mock_resource_manager)
     pipe.fit(X_train, y_train, X_test, y_test)
     # score = accuracy_score(y_test, y_pred)
     score = pipe.score(X_test, y_test)
     print(score)
     self.assertGreater(score, 0)
     pipe.resource_manager.init_trial_table()
     trial = pipe.resource_manager.TrialModel
     records = trial.select().where(
         trial.experiment_id == pipe.experiment_id)
     for record in records:
         self.assertTrue(record is not None)
         self.assertTrue(
             isinstance(record.test_all_score, dict)
             and bool(record.test_all_score)
             # and record.test_all_score["r2"] > 0
         )
         # print(record.test_all_score["r2"])
     pipe.resource_manager.close_trial_table()
Ejemplo n.º 3
0
 def test_single_regressor(self):
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     pipe = AutoFlowRegressor(DAG_workflow={"num->target": ["elasticnet"]},
                              initial_runs=1,
                              run_limit=1,
                              debug=True,
                              resource_manager=self.mock_resource_manager)
     pipe.fit(X_train, y_train)
     # score = accuracy_score(y_test, y_pred)
     score = pipe.score(X_test, y_test)
     print(score)
     self.assertGreater(score, 0)
Ejemplo n.º 4
0
 def test_ensemble_regressors(self):
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     pipe = AutoFlowRegressor(
         DAG_workflow={
             "num->scaled": ["scale.standardize"],
             "scaled->target": ["elasticnet"]
         },
         initial_runs=2,
         run_limit=2,
         n_jobs=2,
         resource_manager=self.mock_resource_manager,
         debug=True,
     )
     pipe.fit(X_train,
              y_train,
              splitter=ShuffleSplit(n_splits=2,
                                    test_size=0.2,
                                    random_state=42))
     score = pipe.score(X_test, y_test)
     print(score)
     assert score > 0.5
     for splitter in [
             # LeaveOneOut(),
             ShuffleSplit(n_splits=20, test_size=0.3, random_state=42),
             KFold()
     ]:
         pipe.fit(X_train, y_train, splitter=splitter)
         score = pipe.score(X_test, y_test)
         assert score > 0.5
         print("splitter:", splitter)
         print("test r2:", score)
Ejemplo n.º 5
0
import os

import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold

from autoflow import AutoFlowRegressor

train_df = pd.read_csv("../data/train_regression.csv")
train_df.replace("NA", np.nan, inplace=True)
test_df = pd.read_csv("../data/test_regression.csv")
test_df.replace("NA", np.nan, inplace=True)
trained_pipeline = AutoFlowRegressor(initial_runs=5,
                                     run_limit=10,
                                     n_jobs=1,
                                     included_regressors=["lightgbm"],
                                     per_run_time_limit=60)
column_descriptions = {
    "id": "Id",
    "target": "SalePrice",
}
if not os.path.exists("autoflow_regression.bz2"):
    trained_pipeline.fit(X_train=train_df,
                         X_test=test_df,
                         column_descriptions=column_descriptions,
                         splitter=KFold(n_splits=3,
                                        shuffle=True,
                                        random_state=42),
                         fit_ensemble_params=False)
    # if you want to see the workflow AutoFlow is searching, you can use `draw_workflow_space` to visualize