def test_regression_pandas_support(tmp_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=41514, # diabetes return_X_y=True, as_frame=True, ) # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnRegressor( time_left_for_this_task=40, per_run_time_limit=5, dask_client=dask_client, tmp_folder=tmp_dir, ) # Make sure we error out because y is not encoded automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) >= 0.5, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl)
def regression(self, metric="r2"): """ Perform auto_regression. Args: metric (str): The evaluation metric of regression. This will be mapped by AutoSklearnML.get_regression_metric to an instance of :class:`autosklearn.metrics.Scorer` as created by :meth:`autosklearn.metrics.make_scorer`. Default metric: "r2". Other supported metrics: "mean_squared_error", "mean_absolute_error", "median_absolute_error" Returns: """ auto_regressor = AutoSklearnRegressor(**self.auto_sklearn_kwargs) regression_metric = AutoSklearnML.get_regression_metric(metric) auto_regressor.fit(self._X_train, self._y_train, metric=regression_metric, dataset_name=self.dataset_name) print(auto_regressor.show_models()) if self.auto_sklearn_kwargs["resampling_strategy"] == "cv": auto_regressor.refit(self._X_train.copy(), self._y_train.copy()) prediction_train = auto_regressor.predict(self._X_train) print("training set {} score: {}".format( metric, regression_metric._score_func(self._y_train, prediction_train))) prediction_test = auto_regressor.predict(self._X_test) print("test set {} score: {}".format( metric, regression_metric._score_func(self._y_test, prediction_test))) with open( os.path.join(self.auto_sklearn_kwargs['output_folder'], 'best_auto_sklearn_output.log'), 'a+') as wf: wf.write('The best model is : \n') wf.write(auto_regressor.show_models()) wf.write("\ntraining set {} score: {}\n".format( metric, regression_metric._score_func(self._y_train, prediction_train))) wf.write('\n') wf.write("test set {} score: {}".format( metric, regression_metric._score_func(self._y_test, prediction_test))) dump_file = os.path.join(self.auto_sklearn_kwargs['output_folder'], 'automl_regressor.dump.pkl') with open(dump_file, 'wb') as f: pickle.dump(auto_regressor, f) return auto_regressor
def test_autosklearn_regression_methods_returns_self(dask_client): X_train, y_train, X_test, y_test = putil.get_dataset('boston') automl = AutoSklearnRegressor(time_left_for_this_task=30, per_run_time_limit=5, dask_client=dask_client, ensemble_size=0) automl_fitted = automl.fit(X_train, y_train) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) assert automl is automl_ensemble_fitted automl_refitted = automl.refit(X_train.copy(), y_train.copy()) assert automl is automl_refitted