Ejemplo n.º 1
0
def test_regression_pandas_support(tmp_dir, output_dir, dask_client):

    X, y = sklearn.datasets.fetch_openml(
        data_id=41514,  # diabetes
        return_X_y=True,
        as_frame=True,
    )
    # This test only make sense if input is dataframe
    assert isinstance(X, pd.DataFrame)
    assert isinstance(y, pd.Series)
    automl = AutoSklearnRegressor(
        time_left_for_this_task=40,
        per_run_time_limit=5,
        dask_client=dask_client,
        tmp_folder=tmp_dir,
        output_folder=output_dir,
    )

    # Make sure we error out because y is not encoded
    automl.fit(X, y)

    # Make sure that at least better than random.
    # We use same X_train==X_test to test code quality
    assert automl.score(X, y) >= 0.5, print_debug_information(automl)

    automl.refit(X, y)

    # Make sure that at least better than random.
    assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl)
    assert count_succeses(
        automl.cv_results_) > 0, print_debug_information(automl)
Ejemplo n.º 2
0
def train_autosklearn(l=None):
    if l is None:
        l = get_data()
    ensemble_size = 1  # 50 ... 1 for vanilla
    initial_configurations_via_metalearning = 0  # 25 ... 0 for vanilla
    model = AutoSklearnRegressor(
        delete_output_folder_after_terminate=True,
        delete_tmp_folder_after_terminate=True,
        disable_evaluator_output=False,
        ensemble_nbest=50,
        ensemble_size=ensemble_size,
        exclude_estimators=None,
        exclude_preprocessors=None,
        get_smac_object_callback=None,
        include_estimators=None,
        include_preprocessors=None,
        initial_configurations_via_metalearning=
        initial_configurations_via_metalearning,
        logging_config=None,
        ml_memory_limit=3072,
        output_folder=None,
        per_run_time_limit=360,
        resampling_strategy='cv',
        resampling_strategy_arguments={'folds': 5},
        # resampling_strategy='holdout',
        # resampling_strategy_arguments=None,
        seed=1,
        shared_mode=False,
        smac_scenario_args=None,
        time_left_for_this_task=3600,
        tmp_folder=None)
    model.fit(l.X_train.values.copy(), l.y_train.values.squeeze().copy())
    model.refit(l.X_train.values.copy(), l.y_train.values.squeeze().copy())
    print(model.show_models())
    return attributedict_from_locals('model')
Ejemplo n.º 3
0
    def test_regression_pandas_support(self):
        X, y = sklearn.datasets.fetch_openml(
            data_id=41514,  # diabetes
            return_X_y=True,
            as_frame=True,
        )
        # This test only make sense if input is dataframe
        self.assertTrue(isinstance(X, pd.DataFrame))
        self.assertTrue(isinstance(y, pd.Series))
        automl = AutoSklearnRegressor(
            time_left_for_this_task=30,
            per_run_time_limit=5,
        )

        # Make sure we error out because y is not encoded
        automl.fit(X, y)

        # Make sure that at least better than random.
        # We use same X_train==X_test to test code quality
        self.assertTrue(automl.score(X, y) > 0.5)

        automl.refit(X, y)

        # Make sure that at least better than random.
        self.assertTrue(r2(y, automl.predict(X)) > 0.5)
Ejemplo n.º 4
0
 def test_conversion_of_list_to_np(self, fit_ensemble, refit, fit):
     automl = AutoSklearnRegressor()
     X = [[1], [2], [3]]
     y = [1, 2, 3]
     automl.fit(X, y)
     self.assertEqual(fit.call_count, 1)
     self.assertIsInstance(fit.call_args[0][0], np.ndarray)
     self.assertIsInstance(fit.call_args[0][1], np.ndarray)
     automl.refit(X, y)
     self.assertEqual(refit.call_count, 1)
     self.assertIsInstance(refit.call_args[0][0], np.ndarray)
     self.assertIsInstance(refit.call_args[0][1], np.ndarray)
     automl.fit_ensemble(y)
     self.assertEqual(fit_ensemble.call_count, 1)
     self.assertIsInstance(fit_ensemble.call_args[0][0], np.ndarray)
Ejemplo n.º 5
0
 def test_conversion_of_list_to_np(self, fit_ensemble, refit, fit):
     automl = AutoSklearnRegressor()
     X = [[1], [2], [3]]
     y = [1, 2, 3]
     automl.fit(X, y)
     self.assertEqual(fit.call_count, 1)
     self.assertIsInstance(fit.call_args[0][0], np.ndarray)
     self.assertIsInstance(fit.call_args[0][1], np.ndarray)
     automl.refit(X, y)
     self.assertEqual(refit.call_count, 1)
     self.assertIsInstance(refit.call_args[0][0], np.ndarray)
     self.assertIsInstance(refit.call_args[0][1], np.ndarray)
     automl.fit_ensemble(y)
     self.assertEqual(fit_ensemble.call_count, 1)
     self.assertIsInstance(fit_ensemble.call_args[0][0], np.ndarray)
Ejemplo n.º 6
0
class AutoML(AbstractModel):
    def __init__(self):
        super().__init__()
        self.model = AutoSklearnRegressor

    def fit(self, x, y, modeldict=None):
        if not self.m:
            self.param_search(x, y)
        self.m.refit(x, y)

    def param_search(self, x, y, time_per_sample=3.5, **kwargs):
        time = int(len(y) * time_per_sample)
        self.m = AutoSklearnRegressor(
            time_left_for_this_task=time,
            resampling_strategy="cv",
            resampling_strategy_arguments={'folds': 10})

        self.m.fit(x,
                   y,
                   metric=mean_squared_error,
                   dataset_name="Land Use Regression")
        # print(self.m.sprint_statistics())
        # score = score_funtion(y, self.m.predict(x))
        # print("Reached a score of {}.".format(score))

        kf = KFold(n_splits=10, shuffle=True)
        rmse = []
        mae = []
        r2 = []
        for train_index, test_index in kf.split(x, y):
            X_train, X_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            self.m.refit(X_train, y_train)
            predictions = self.m.predict(X_test)
            rmse_iter, mae_iter, r2_iter = self.score_function(
                y_test, predictions)
            rmse.append(rmse_iter)
            mae.append(mae_iter)
            r2.append(r2_iter)

        # print("Reached a RMSE of {}, MAE of {} and R2 of {}.".format(np.mean(rmse), np.mean(mae), np.mean(r2)))

        return self.concat_results(np.mean(rmse), np.mean(mae), np.mean(r2))
Ejemplo n.º 7
0
    def test_regression_methods_returns_self(self):
        X_train, y_train, X_test, y_test = putil.get_dataset('boston')
        automl = AutoSklearnRegressor(time_left_for_this_task=20,
                                      per_run_time_limit=5,
                                      ensemble_size=0)

        automl_fitted = automl.fit(X_train, y_train)
        self.assertIs(automl, automl_fitted)

        automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
        self.assertIs(automl, automl_ensemble_fitted)

        automl_refitted = automl.refit(X_train.copy(), y_train.copy())
        self.assertIs(automl, automl_refitted)
Ejemplo n.º 8
0
    def test_regression_methods_returns_self(self):
        X_train, y_train, X_test, y_test = putil.get_dataset('boston')
        automl = AutoSklearnRegressor(time_left_for_this_task=20,
                                      per_run_time_limit=5,
                                      ensemble_size=0)

        automl_fitted = automl.fit(X_train, y_train)
        self.assertIs(automl, automl_fitted)

        automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
        self.assertIs(automl, automl_ensemble_fitted)

        automl_refitted = automl.refit(X_train.copy(), y_train.copy())
        self.assertIs(automl, automl_refitted)
Ejemplo n.º 9
0
def test_autosklearn_regression_methods_returns_self(dask_client):
    X_train, y_train, X_test, y_test = putil.get_dataset('boston')
    automl = AutoSklearnRegressor(time_left_for_this_task=30,
                                  per_run_time_limit=5,
                                  dask_client=dask_client,
                                  ensemble_size=0)

    automl_fitted = automl.fit(X_train, y_train)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    automl_refitted = automl.refit(X_train.copy(), y_train.copy())
    assert automl is automl_refitted
Ejemplo n.º 10
0
#-----REGRESSION-----
automl = AutoSklearnRegressor(
    per_run_time_limit=360,
    ml_memory_limit=1024 * 8,
    time_left_for_this_task=3600,
    resampling_strategy='cv',
    #                              ensemble_size=1,
    #                              initial_configurations_via_metalearning=0,
    resampling_strategy_arguments={'folds': 5})
start = time.time()

#X_train = X_train.astype('float') # when?
automl.fit(X_train, y_train,
           dataset_name='boston_housing')  #change dataset name accordingly
automl.refit(X_train.copy(), y_train.copy())
print(
    '[INFO] Elapsed time finding best model: {} seconds.'.format(time.time() -
                                                                 start))

predictions = automl.predict(X_test)
#print('--- CLASSIFICATION REPORT: ---')        #not for regression
#print(classification_report(y_test, predictions, digits=5))
print('\n\n--- MODELS: ---')
print(automl.show_models())
print('\n\n--- STATISTICS: ---')
print(automl.sprint_statistics())

#-----CLASSIFIER-----
#print('\n\n--- SCORE: ---')
#print("Balanced error score", 1 - balanced_accuracy_score(y_test, predictions))
Ejemplo n.º 11
0
preprocessing_to_use = ["no_preprocessing"]

# Init auto-sklearn
auto_sklearn = AutoSklearnRegressor(time_left_for_this_task=60 * 5,
                                    per_run_time_limit=360,
                                    include_estimators=estimators_to_use,
                                    exclude_estimators=None,
                                    include_preprocessors=preprocessing_to_use,
                                    exclude_preprocessors=None,
                                    ml_memory_limit=6156,
                                    resampling_strategy="cv",
                                    resampling_strategy_arguments={"folds": 5})

# Train models
auto_sklearn.fit(X=X_train.copy(), y=y_train.copy(), metric=mean_squared_error)
it_fits = auto_sklearn.refit(X=X_train.copy(), y=y_train.copy())

# Predict
y_hat = auto_sklearn.predict(X_test)

# Show results
auto_sklearn.cv_results_
auto_sklearn.sprint_statistics()
auto_sklearn.show_models()
auto_sklearn.get_models_with_weights()

# TPOT

from tpot import TPOTRegressor

tpot_config = {