def test_autosklearn_classification_methods_returns_self(dask_client): """ Currently this method only tests that the methods of AutoSklearnClassifier is able to fit using fit(), fit_ensemble() and refit() """ X_train, y_train, X_test, y_test = putil.get_dataset('iris') automl = AutoSklearnClassifier(time_left_for_this_task=60, per_run_time_limit=10, ensemble_size=0, dask_client=dask_client, exclude_preprocessors=['fast_ica']) automl_fitted = automl.fit(X_train, y_train) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) assert automl is automl_ensemble_fitted automl_refitted = automl.refit(X_train.copy(), y_train.copy()) assert automl is automl_refitted
def test_autosklearn_anneal(as_frame): """ This test makes sure that anneal dataset can be fitted and scored. This dataset is quite complex, with NaN, categorical and numerical columns so is a good testcase for unit-testing """ X, y = sklearn.datasets.fetch_openml(data_id=2, return_X_y=True, as_frame=as_frame) automl = AutoSklearnClassifier(time_left_for_this_task=60, ensemble_size=0, delete_tmp_folder_after_terminate=False, initial_configurations_via_metalearning=0, smac_scenario_args={'runcount_limit': 6}, resampling_strategy='holdout-iterative-fit') if as_frame: # Let autosklearn calculate the feat types automl_fitted = automl.fit(X, y) else: X_, y_ = sklearn.datasets.fetch_openml(data_id=2, return_X_y=True, as_frame=True) feat_type = [ 'categorical' if X_[col].dtype.name == 'category' else 'numerical' for col in X_.columns ] automl_fitted = automl.fit(X, y, feat_type=feat_type) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y, ensemble_size=5) assert automl is automl_ensemble_fitted # We want to make sure we can learn from this data. # This is a test to make sure the data format (numpy/pandas) # can be used in a meaningful way -- not meant for generalization, # hence we use the train dataset assert automl_fitted.score(X, y) > 0.75