def test_regression_pandas_support(tmp_dir, dask_client):

    X, y = sklearn.datasets.fetch_openml(
        data_id=41514,  # diabetes
        return_X_y=True,
        as_frame=True,
    )
    # This test only make sense if input is dataframe
    assert isinstance(X, pd.DataFrame)
    assert isinstance(y, pd.Series)
    automl = AutoSklearnRegressor(
        time_left_for_this_task=40,
        per_run_time_limit=5,
        dask_client=dask_client,
        tmp_folder=tmp_dir,
    )

    # Make sure we error out because y is not encoded
    automl.fit(X, y)

    # Make sure that at least better than random.
    # We use same X_train==X_test to test code quality
    assert automl.score(X, y) >= 0.5, print_debug_information(automl)

    automl.refit(X, y)

    # Make sure that at least better than random.
    assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl)
    assert count_succeses(
        automl.cv_results_) > 0, print_debug_information(automl)
Example #2
0
    def regression(self, metric="r2"):
        """
        Perform auto_regression.
        Args:
            metric (str): The evaluation metric of regression.
                 This will be mapped by AutoSklearnML.get_regression_metric
                 to an instance of :class:`autosklearn.metrics.Scorer` as
                 created by :meth:`autosklearn.metrics.make_scorer`.
                 Default metric: "r2".
                 Other supported metrics: "mean_squared_error",
                                          "mean_absolute_error",
                                          "median_absolute_error"

        Returns:

        """
        auto_regressor = AutoSklearnRegressor(**self.auto_sklearn_kwargs)
        regression_metric = AutoSklearnML.get_regression_metric(metric)
        auto_regressor.fit(self._X_train,
                           self._y_train,
                           metric=regression_metric,
                           dataset_name=self.dataset_name)
        print(auto_regressor.show_models())

        if self.auto_sklearn_kwargs["resampling_strategy"] == "cv":
            auto_regressor.refit(self._X_train.copy(), self._y_train.copy())

        prediction_train = auto_regressor.predict(self._X_train)
        print("training set {} score: {}".format(
            metric,
            regression_metric._score_func(self._y_train, prediction_train)))

        prediction_test = auto_regressor.predict(self._X_test)
        print("test set {} score: {}".format(
            metric, regression_metric._score_func(self._y_test,
                                                  prediction_test)))

        with open(
                os.path.join(self.auto_sklearn_kwargs['output_folder'],
                             'best_auto_sklearn_output.log'), 'a+') as wf:
            wf.write('The best model is : \n')
            wf.write(auto_regressor.show_models())
            wf.write("\ntraining set {} score: {}\n".format(
                metric,
                regression_metric._score_func(self._y_train,
                                              prediction_train)))
            wf.write('\n')
            wf.write("test set {} score: {}".format(
                metric,
                regression_metric._score_func(self._y_test, prediction_test)))

        dump_file = os.path.join(self.auto_sklearn_kwargs['output_folder'],
                                 'automl_regressor.dump.pkl')

        with open(dump_file, 'wb') as f:
            pickle.dump(auto_regressor, f)

        return auto_regressor
def test_autosklearn_regression_methods_returns_self(dask_client):
    X_train, y_train, X_test, y_test = putil.get_dataset('boston')
    automl = AutoSklearnRegressor(time_left_for_this_task=30,
                                  per_run_time_limit=5,
                                  dask_client=dask_client,
                                  ensemble_size=0)

    automl_fitted = automl.fit(X_train, y_train)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    automl_refitted = automl.refit(X_train.copy(), y_train.copy())
    assert automl is automl_refitted