def test_cv_regression(tmp_dir, output_dir, dask_client): """ Makes sure that when using a cv strategy, we are able to fit a regressor """ X_train, Y_train, X_test, Y_test = putil.get_dataset( 'boston', train_size_maximum=300) automl = AutoSklearnRegressor(time_left_for_this_task=60, per_run_time_limit=10, resampling_strategy='cv', tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) # Log file path log_file_path = glob.glob(os.path.join(tmp_dir, 'AutoML*.log'))[0] predictions = automl.predict(X_test) assert predictions.shape == (206, ) score = r2(Y_test, predictions) assert score >= 0.1, extract_msg_from_log(log_file_path) assert count_succeses( automl.cv_results_) > 0, extract_msg_from_log(log_file_path)
def test_regression_pandas_support(tmp_dir, output_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=41514, # diabetes return_X_y=True, as_frame=True, ) # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnRegressor( time_left_for_this_task=40, per_run_time_limit=5, dask_client=dask_client, tmp_folder=tmp_dir, output_folder=output_dir, ) # Make sure we error out because y is not encoded automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) >= 0.5, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl)
def test_binary(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_binary=True) automl = AutoSklearnClassifier(time_left_for_this_task=40, per_run_time_limit=10, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test, dataset_name='binary_test_dataset') predictions = automl.predict(X_test) assert predictions.shape == (50, ), print_debug_information(automl) score = accuracy(Y_test, predictions) assert score > 0.9, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) output_files = glob.glob( os.path.join(output_dir, 'binary_test_dataset_test_*.predict')) assert len(output_files) > 0, (output_files, print_debug_information(automl))
def test_multilabel(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (50, 3), print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True score = f1_macro(Y_test, predictions) assert score >= 0.9, print_debug_information(automl) probs = automl.predict_proba(X_train) assert np.mean(probs) == pytest.approx(0.33, rel=1e-1)
def test_cv_regression(tmp_dir, dask_client): """ Makes sure that when using a cv strategy, we are able to fit a regressor """ X_train, Y_train, X_test, Y_test = putil.get_dataset( 'boston', train_size_maximum=300) automl = AutoSklearnRegressor( time_left_for_this_task=60, per_run_time_limit=10, resampling_strategy='cv', tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (206, ) score = r2(Y_test, predictions) assert score >= 0.1, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_binary(tmp_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_binary=True) automl = AutoSklearnClassifier( time_left_for_this_task=40, delete_tmp_folder_after_terminate=False, per_run_time_limit=10, tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test, dataset_name='binary_test_dataset') predictions = automl.predict(X_test) assert predictions.shape == (50, ), print_debug_information(automl) score = accuracy(Y_test, predictions) assert score > 0.9, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_all_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_can_pickle_classifier(tmp_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = AutoSklearnClassifier( time_left_for_this_task=30, delete_tmp_folder_after_terminate=False, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train) initial_predictions = automl.predict(X_test) initial_accuracy = sklearn.metrics.accuracy_score(Y_test, initial_predictions) assert initial_accuracy >= 0.75 assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True # Test pickle dump_file = os.path.join(tmp_dir, 'automl.dump.pkl') with open(dump_file, 'wb') as f: pickle.dump(automl, f) with open(dump_file, 'rb') as f: restored_automl = pickle.load(f) restored_predictions = restored_automl.predict(X_test) restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions) assert restored_accuracy >= 0.75 assert initial_accuracy == restored_accuracy # Test joblib dump_file = os.path.join(tmp_dir, 'automl.dump.joblib') joblib.dump(automl, dump_file) restored_automl = joblib.load(dump_file) restored_predictions = restored_automl.predict(X_test) restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions) assert restored_accuracy >= 0.75 assert initial_accuracy == restored_accuracy
def test_fit(dask_client, backend): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = autosklearn.automl.AutoML( backend=backend, time_left_for_this_task=30, per_run_time_limit=5, metric=accuracy, dask_client=dask_client, ) automl.fit( X_train, Y_train, task=MULTICLASS_CLASSIFICATION, ) score = automl.score(X_test, Y_test) assert score > 0.8 assert count_succeses(automl.cv_results_) > 0 assert automl._task == MULTICLASS_CLASSIFICATION del automl
def test_fit_roar(dask_client_single_worker, backend): def get_roar_object_callback( scenario_dict, seed, ta, ta_kwargs, dask_client, n_jobs, **kwargs ): """Random online adaptive racing. http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf""" scenario = Scenario(scenario_dict) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, tae_runner_kwargs=ta_kwargs, dask_client=dask_client, n_jobs=n_jobs, ) X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = autosklearn.automl.AutoML( backend=backend, time_left_for_this_task=30, per_run_time_limit=5, initial_configurations_via_metalearning=0, get_smac_object_callback=get_roar_object_callback, metric=accuracy, dask_client=dask_client_single_worker, ) automl.fit( X_train, Y_train, task=MULTICLASS_CLASSIFICATION, ) score = automl.score(X_test, Y_test) assert score > 0.8 assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert automl._task == MULTICLASS_CLASSIFICATION del automl
def test_can_pickle_classifier(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) initial_predictions = automl.predict(X_test) initial_accuracy = sklearn.metrics.accuracy_score(Y_test, initial_predictions) assert initial_accuracy >= 0.75 assert count_succeses(automl.cv_results_) > 0 # Test pickle dump_file = os.path.join(output_dir, 'automl.dump.pkl') with open(dump_file, 'wb') as f: pickle.dump(automl, f) with open(dump_file, 'rb') as f: restored_automl = pickle.load(f) restored_predictions = restored_automl.predict(X_test) restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions) assert restored_accuracy >= 0.75 assert initial_accuracy == restored_accuracy # Test joblib dump_file = os.path.join(output_dir, 'automl.dump.joblib') joblib.dump(automl, dump_file) restored_automl = joblib.load(dump_file) restored_predictions = restored_automl.predict(X_test) restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions) assert restored_accuracy >= 0.75 assert initial_accuracy == restored_accuracy
def test_regression(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('boston') automl = AutoSklearnRegressor(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (356, ) score = mean_squared_error(Y_test, predictions) # On average np.sqrt(30) away from the target -> ~5.5 on average # Results with select rates drops avg score to a range of -32.40 to -37, on 30 seconds # constraint. With more time_left_for_this_task this is no longer an issue assert score >= -37, print_debug_information(automl) assert count_succeses(automl.cv_results_) > 0
def test_fit(dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = autosklearn.automl.AutoML( seed=0, time_left_for_this_task=30, per_run_time_limit=5, metric=accuracy, dask_client=dask_client, ) automl.fit(X_train, Y_train, task=MULTICLASS_CLASSIFICATION) score = automl.score(X_test, Y_test) assert score > 0.8 assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True assert automl._task == MULTICLASS_CLASSIFICATION del automl
def test_classification_pandas_support(tmp_dir, output_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=2, # cat/num dataset return_X_y=True, as_frame=True, ) # Drop NAN!! X = X.dropna('columns') # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, exclude_estimators=['libsvm_svc'], dask_client=dask_client, seed=5, tmp_folder=tmp_dir, output_folder=output_dir, ) automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) > 0.555, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. # accuracy in sklearn needs valid data # It should be 0.555 as the dataset is unbalanced. prediction = automl.predict(X) assert accuracy(y, prediction) > 0.555 assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_classification_pandas_support(tmp_dir, output_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=2, # cat/num dataset return_X_y=True, as_frame=True, ) # Drop NAN!! X = X.dropna('columns') # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, exclude_estimators=['libsvm_svc'], dask_client=dask_client, seed=5, tmp_folder=tmp_dir, output_folder=output_dir, ) automl.fit(X, y) log_file_path = glob.glob(os.path.join(tmp_dir, 'AutoML*.log'))[0] # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) > 0.555, extract_msg_from_log(log_file_path) automl.refit(X, y) # Make sure that at least better than random. # accuracy in sklearn needs valid data # It should be 0.555 as the dataset is unbalanced. y = automl.automl_.InputValidator.encode_target(y) prediction = automl.automl_.InputValidator.encode_target(automl.predict(X)) assert accuracy(y, prediction) > 0.555 assert count_succeses(automl.cv_results_) > 0
def test_multilabel(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) # Log file path log_file_path = glob.glob(os.path.join(tmp_dir, 'AutoML*.log'))[0] predictions = automl.predict(X_test) assert predictions.shape == (50, 3), extract_msg_from_log(log_file_path) assert count_succeses( automl.cv_results_) > 0, extract_msg_from_log(log_file_path) score = f1_macro(Y_test, predictions) assert score >= 0.9, extract_msg_from_log(log_file_path) probs = automl.predict_proba(X_train) assert np.mean(probs) == pytest.approx(0.33, rel=1e-1)
def test_fit_n_jobs(tmp_dir, output_dir): X_train, Y_train, X_test, Y_test = putil.get_dataset('breast_cancer') # test parallel Classifier to predict classes, not only indices Y_train += 1 Y_test += 1 class get_smac_object_wrapper: def __call__(self, *args, **kwargs): self.n_jobs = kwargs['n_jobs'] smac = get_smac_object(*args, **kwargs) self.dask_n_jobs = smac.solver.tae_runner.n_workers self.dask_client_n_jobs = len( smac.solver.tae_runner.client.scheduler_info()['workers']) return smac get_smac_object_wrapper_instance = get_smac_object_wrapper() automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, output_folder=output_dir, tmp_folder=tmp_dir, seed=1, initial_configurations_via_metalearning=0, ensemble_size=5, n_jobs=2, include_estimators=['sgd'], include_preprocessors=['no_preprocessing'], get_smac_object_callback=get_smac_object_wrapper_instance, max_models_on_disc=None, ) automl.fit(X_train, Y_train) # Test that the argument is correctly passed to SMAC assert getattr(get_smac_object_wrapper_instance, 'n_jobs') == 2 assert getattr(get_smac_object_wrapper_instance, 'dask_n_jobs') == 2 assert getattr(get_smac_object_wrapper_instance, 'dask_client_n_jobs') == 2 available_num_runs = set() for run_key, run_value in automl.automl_.runhistory_.data.items(): if run_value.additional_info is not None and 'num_run' in run_value.additional_info: available_num_runs.add(run_value.additional_info['num_run']) available_predictions = set() predictions = glob.glob( os.path.join(automl.automl_._backend.get_runs_directory(), '*', 'predictions_ensemble*.npy')) seeds = set() for prediction in predictions: prediction = os.path.split(prediction)[1] match = re.match(MODEL_FN_RE, prediction.replace("predictions_ensemble", "")) if match: num_run = int(match.group(2)) available_predictions.add(num_run) seed = int(match.group(1)) seeds.add(seed) # Remove the dummy prediction, it is not part of the runhistory available_predictions.remove(1) assert available_num_runs.issubset(available_predictions) assert len(seeds) == 1 ensemble_dir = automl.automl_._backend.get_ensemble_dir() ensembles = os.listdir(ensemble_dir) seeds = set() for ensemble_file in ensembles: seeds.add(int(ensemble_file.split('.')[0].split('_')[0])) assert len(seeds) == 1 assert count_succeses(automl.cv_results_) > 0 # For travis-ci it is important that the client no longer exists assert automl.automl_._dask_client is None